diff --git "a/compression_config.json" "b/compression_config.json" new file mode 100644--- /dev/null +++ "b/compression_config.json" @@ -0,0 +1,196679 @@ +[ + { + "memory": 669653120, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 1, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 1, + "layer.1.self_attn.o_proj": 1, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 1, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 1, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 1, + "layer.3.self_attn.o_proj": 1, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 1, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 1, + "layer.4.self_attn.o_proj": 1, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 1, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 1, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 1, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 1, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 1, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 1, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 1, + "layer.9.mlp.down_proj": 1, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 1, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 1, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 1, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 1, + "layer.13.mlp.gate_proj": 1, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 1, + "layer.15.self_attn.o_proj": 1, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 1, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 670308480, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 1, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 1, + "layer.1.self_attn.o_proj": 1, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 1, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 1, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 1, + "layer.3.self_attn.o_proj": 1, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 1, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 1, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 1, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 1, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 1, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 1, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 1, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 1, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 1, + "layer.9.mlp.down_proj": 1, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 1, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 1, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 1, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 1, + "layer.13.mlp.gate_proj": 1, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 1, + "layer.15.self_attn.o_proj": 1, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 1, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 672733312, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 1, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 1, + "layer.1.self_attn.o_proj": 1, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 1, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 1, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 1, + "layer.3.self_attn.o_proj": 1, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 1, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 1, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 1, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 1, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 1, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 1, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 1, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 1, + "layer.9.mlp.down_proj": 1, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 1, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 1, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 1, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 1, + "layer.13.mlp.gate_proj": 1, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 1, + "layer.15.self_attn.o_proj": 1, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 1, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 673388672, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 1, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 1, + "layer.1.self_attn.o_proj": 1, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 1, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 1, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 1, + "layer.3.self_attn.o_proj": 1, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 1, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 1, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 1, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 1, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 1, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 1, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 1, + "layer.9.mlp.down_proj": 1, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 1, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 1, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 1, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 1, + "layer.13.mlp.gate_proj": 1, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 1, + "layer.15.self_attn.o_proj": 1, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 1, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 674044032, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 1, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 1, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 1, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 1, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 1, + "layer.3.self_attn.o_proj": 1, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 1, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 1, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 1, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 1, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 1, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 1, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 1, + "layer.9.mlp.down_proj": 1, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 1, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 1, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 1, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 1, + "layer.13.mlp.gate_proj": 1, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 1, + "layer.15.self_attn.o_proj": 1, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 1, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 674699392, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 1, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 1, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 1, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 1, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 1, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 1, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 1, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 1, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 1, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 1, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 1, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 1, + "layer.9.mlp.down_proj": 1, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 1, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 1, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 1, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 1, + "layer.13.mlp.gate_proj": 1, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 1, + "layer.15.self_attn.o_proj": 1, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 1, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 677124224, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 1, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 1, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 1, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 1, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 1, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 1, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 1, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 1, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 1, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 1, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 1, + "layer.9.mlp.down_proj": 1, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 1, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 1, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 1, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 1, + "layer.13.mlp.gate_proj": 1, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 1, + "layer.15.self_attn.o_proj": 1, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 1, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 677779584, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 1, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 1, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 1, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 1, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 1, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 1, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 1, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 1, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 1, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 1, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 1, + "layer.9.mlp.down_proj": 1, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 1, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 1, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 1, + "layer.13.mlp.gate_proj": 1, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 1, + "layer.15.self_attn.o_proj": 1, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 1, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 680204416, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 1, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 1, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 1, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 1, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 1, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 1, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 1, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 1, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 1, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 1, + "layer.9.mlp.down_proj": 1, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 1, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 1, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 1, + "layer.13.mlp.gate_proj": 1, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 1, + "layer.15.self_attn.o_proj": 1, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 1, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 682629248, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 1, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 1, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 1, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 1, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 1, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 1, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 1, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 1, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 1, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 1, + "layer.9.mlp.down_proj": 1, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 1, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 1, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 1, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 1, + "layer.15.self_attn.o_proj": 1, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 1, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 682842240, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 1, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 1, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 1, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 1, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 1, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 1, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 1, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 1, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 1, + "layer.9.mlp.down_proj": 1, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 1, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 1, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 1, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 1, + "layer.15.self_attn.o_proj": 1, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 1, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 683497600, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 1, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 1, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 1, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 1, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 1, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 1, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 1, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 1, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 1, + "layer.9.mlp.down_proj": 1, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 1, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 1, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 1, + "layer.15.self_attn.o_proj": 1, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 1, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 685922432, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 1, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 1, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 1, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 1, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 1, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 1, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 1, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 1, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 1, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 1, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 1, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 1, + "layer.15.self_attn.o_proj": 1, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 1, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 686135424, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 1, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 1, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 1, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 1, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 1, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 1, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 1, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 1, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 1, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 1, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 1, + "layer.15.self_attn.o_proj": 1, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 1, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 686348416, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 1, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 1, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 1, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 1, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 1, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 1, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 1, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 1, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 1, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 1, + "layer.15.self_attn.o_proj": 1, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 1, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 688773248, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 1, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 1, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 1, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 1, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 1, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 1, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 1, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 1, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 1, + "layer.15.self_attn.o_proj": 1, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 1, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 688986240, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 1, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 1, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 1, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 1, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 1, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 1, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 1, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 1, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 1, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 1, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 691411072, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 1, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 1, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 1, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 1, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 1, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 1, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 1, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 1, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 1, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 693835904, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 1, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 1, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 1, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 1, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 1, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 1, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 1, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 1, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 694491264, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 1, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 1, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 1, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 1, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 1, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 1, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 1, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 696916096, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 1, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 1, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 1, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 1, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 1, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 1, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 699340928, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 1, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 1, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 1, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 1, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 1, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 699553920, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 1, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 1, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 1, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 1, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 701978752, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 1, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 1, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 1, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 704403584, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 1, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 1, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 705058944, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 1, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 705271936, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 1, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 705484928, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 1, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 706140288, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 1, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 706353280, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 1, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 706566272, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 1, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 708991104, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 1, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 711415936, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 1, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 713840768, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 1 + } + }, + { + "memory": 716265600, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 1, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 716920960, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 1, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 719345792, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 1, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 719558784, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 1, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 720214144, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 1, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 720427136, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 1, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 720640128, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 1, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 723064960, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 1, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 723277952, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 1, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 723490944, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 1, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 723703936, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 1, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 724359296, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 1, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 724572288, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 1, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 726997120, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 1, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 727652480, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 1, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 730077312, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 1, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 732502144, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 1, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 734926976, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 1, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 737351808, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 1, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 738007168, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 1, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 738662528, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 1, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 741087360, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 1, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 741300352, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 1, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 741513344, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 1, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 742168704, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 1, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 742824064, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 1, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 743037056, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 1, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 743250048, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 1, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 743905408, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 1, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 744118400, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 1, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 744773760, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 1, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 744986752, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 1, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 747411584, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 1, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 749836416, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 1, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 752261248, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 1, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 752916608, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 1, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 753129600, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 1, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 753784960, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 1, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 754440320, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 1, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 756865152, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 1, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 757520512, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 1, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 759945344, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 1, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 762370176, + "layers": { + "layer.0.self_attn.q_proj": 1, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 763025536, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 1, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 763680896, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 1, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 766105728, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 1, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 766318720, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 1, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 766531712, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 1, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 767187072, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 1, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 769611904, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 1, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 769824896, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 1, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 772249728, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 1, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 772905088, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 1, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 775329920, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 1, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 775985280, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 1, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 778410112, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 1, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 779065472, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 1, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 779720832, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 1, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 782145664, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 1, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 784570496, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 1, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 786995328, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 1, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 789420160, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 1, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 789633152, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 1, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 789846144, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 1, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 790059136, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 1, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 792483968, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 1, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 792696960, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 1, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 795121792, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 1, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 797546624, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 1, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 799971456, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 1, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 802396288, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 1, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 803051648, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 1, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 805476480, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 1, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 807901312, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 1, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 810326144, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 2, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 1, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 812750976, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 2, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 1, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 2, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 813406336, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 2, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 1, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 2, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 813619328, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 2, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 1, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 2, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 813832320, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 2, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 2, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 2, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 816257152, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 2, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 2, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 2, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 816912512, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 2, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 2, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 819337344, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 2, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 2, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 821762176, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 2, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 2, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 824187008, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 2, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 2, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 826611840, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 2, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 829036672, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 2, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 829249664, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 2, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 829905024, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 2, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 830560384, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 2, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 832985216, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 2, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 835410048, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 2, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 837834880, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 2, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 838047872, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 2, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 838703232, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 2, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 838916224, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 2, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 841341056, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 2, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 841996416, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 2, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 842651776, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 2, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 845076608, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 2, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 847501440, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 2, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 849926272, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 2, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 852351104, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 2, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 854775936, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 2, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 857200768, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 2, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 857856128, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 2, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 858511488, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 2, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 860936320, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 2, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 863361152, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 2, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 864016512, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 2, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 866441344, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 2, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 867096704, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 2, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 867752064, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 2, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 870176896, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 2, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 872601728, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 2, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 872814720, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 2, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 873027712, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 2, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 875452544, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 2, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 875665536, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 2, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 876320896, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 2, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 876976256, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 2, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 879401088, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 2, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 880056448, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 2, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 882481280, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 2, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 882694272, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 2, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 882907264, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 2, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 885332096, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 2, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 885545088, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 2, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 887969920, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 2, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 888182912, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 2, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 890607744, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 2, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 893032576, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 2, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 893687936, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 2, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 896112768, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 2, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 898537600, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 2, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 898750592, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 2, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 901175424, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 2, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 901388416, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 2, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 901601408, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 2, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 904026240, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 2, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 906451072, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 2, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 908875904, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 2, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 909531264, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 2, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 909744256, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 2, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 909957248, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 2, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 912382080, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 2, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 914806912, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 2, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 915019904, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 2, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 915232896, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 2, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 915888256, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 2, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 916543616, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 2, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 916756608, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 2, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 916969600, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 2, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 919394432, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 2, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 920049792, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 2, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 920705152, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 2, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 921360512, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 2, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 923785344, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 2 + } + }, + { + "memory": 926210176, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 2, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 926865536, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 2, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 929290368, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 2, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 931715200, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 2, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 932370560, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 2, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 932583552, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 2, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 932796544, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 2, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 935221376, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 2, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 935434368, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 2, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 936089728, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 2, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 936302720, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 2, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 936958080, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 2, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 939382912, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 2, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 939595904, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 2, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 939808896, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 2, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 942233728, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 3, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 2, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 942889088, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 3, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 2, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 943544448, + "layers": { + "layer.0.self_attn.q_proj": 2, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 3, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 944199808, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 3, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 2, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 944855168, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 3, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 2, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 945510528, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 3, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 2, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 945723520, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 3, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 2, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 945936512, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 3, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 2, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 946591872, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 3, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 2, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 946804864, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 3, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 2, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 949229696, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 3, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 2, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 949442688, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 3, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 2, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 951867520, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 3, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 2, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 954292352, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 3, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 2, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 954505344, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 3, + "layer.4.mlp.up_proj": 2, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 956930176, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 2, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 3, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 957143168, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 3, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 2, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 957356160, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 3, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 3, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 2, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 958011520, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 3, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 3, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 3, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 960436352, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 3, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 3, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 3, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 962861184, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 3, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 3, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 3, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 965286016, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 3, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 3, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 3, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 967710848, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 3, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 3, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 3, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 970135680, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 3, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 3, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 970348672, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 3, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 3, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 971004032, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 3, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 971659392, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 3, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 972314752, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 3, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 972970112, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 3, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 973625472, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 3, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 976050304, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 3, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 976263296, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 3, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 976918656, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 3, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 977574016, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 3, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 979998848, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 3, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 982423680, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 3, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 984848512, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 3, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 987273344, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 3, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 989698176, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 3, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 992123008, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 3, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 994547840, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 3, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 996972672, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 3, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 997185664, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 3, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 999610496, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 3, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1002035328, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 3, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1002248320, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 3, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1004673152, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 3, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1004886144, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 3, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1007310976, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 3, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1007966336, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 3, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1010391168, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 3, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1012816000, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 3, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1015240832, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 3, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1017665664, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 3, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1020090496, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 3, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1020745856, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 3, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1023170688, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 3, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1025595520, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 3, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1025808512, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 3, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1026463872, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 3, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1027119232, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 3, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1027774592, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 3, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1030199424, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 3, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1030854784, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 3, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1031510144, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 3, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1031723136, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 3, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1034147968, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 3, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1034803328, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 3, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1037228160, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 3, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1039652992, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 3, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1039865984, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 3 + } + }, + { + "memory": 1042290816, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 3, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1042503808, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 3, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1044928640, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 3, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1045141632, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 3, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1045796992, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 3, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1046452352, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 3, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1046665344, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 3, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1047320704, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 3, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1047976064, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 3, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1050400896, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 3, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1052825728, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 3, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1053038720, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 3, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1055463552, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 3, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1056118912, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 3, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1056331904, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 3, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1058756736, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 3, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1061181568, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 3, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1061394560, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 3, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1061607552, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 3, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1064032384, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 3, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1064245376, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 3, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1066670208, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 3, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1067325568, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 3, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1069750400, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 3, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1070405760, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 3, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1072830592, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 3, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1073043584, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 3, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1073698944, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 3, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1073911936, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 3, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1074124928, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 3, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1074337920, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 3, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1074993280, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 3, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1075206272, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 3, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1075861632, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 3, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1078286464, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 3, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1080711296, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 3, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1080924288, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 3, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1081137280, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 3, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1081792640, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 3, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1082005632, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 3, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1082660992, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 3, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1082873984, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 3, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1083529344, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 3, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1083742336, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 3, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1084397696, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 3, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1086822528, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 3, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1089247360, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 3, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1089902720, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 3, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1092327552, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 3, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1092540544, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 3, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1092753536, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 3, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1092966528, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 3, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1093179520, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 3, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1095604352, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 3, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1095817344, + "layers": { + "layer.0.self_attn.q_proj": 3, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1096472704, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 3, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1098897536, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 3, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1101322368, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 3, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 4, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1101977728, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 4, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 3, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 4, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1102190720, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 4, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 4, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 4, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1104615552, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 4, + "layer.5.mlp.gate_proj": 4, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 4, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1107040384, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 4, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 4, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1107695744, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 4, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 4, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1110120576, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 4, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 4 + } + }, + { + "memory": 1112545408, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 4, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 4, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1114970240, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 4, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 4, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1117395072, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 4, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 4, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1119819904, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 4, + "layer.6.mlp.up_proj": 4, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1122244736, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 4, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 4, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1122900096, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 4, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 4, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1125324928, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 4, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 4, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1125980288, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 4, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 4, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1126193280, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 4, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 4, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1126848640, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 4, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 4, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1129273472, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 4, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 4, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1129928832, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 4, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 4, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1132353664, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 4, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 4, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1134778496, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 4, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1137203328, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 4, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1137858688, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 4, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1140283520, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 4, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1142708352, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 4, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1145133184, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 4, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1147558016, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 4, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1149982848, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 4, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1152407680, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 4, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1154832512, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 4, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1155487872, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 4, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1157912704, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 4, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1160337536, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 4, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1162762368, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 4, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1162975360, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 4, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1163630720, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 4, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1166055552, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 4, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1168480384, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 4, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1170905216, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 4, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1173330048, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 4, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1175754880, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 4, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1176410240, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 4, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1178835072, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 4, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1181259904, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 4, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1181472896, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 4, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1183897728, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 4, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1186322560, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 4, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1186977920, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 4, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1187190912, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 4, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1187403904, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 4, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1189828736, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 4, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1192253568, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 4, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1194678400, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 4, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1197103232, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 4, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1199528064, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 4, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1199741056, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 4, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1202165888, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 4, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1204590720, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 4, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1207015552, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 4, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1207670912, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 4, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1210095744, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 4, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1210751104, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 4, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1211406464, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 4, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1211619456, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 4, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1211832448, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 4, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1212045440, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 4, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1214470272, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 4, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1216895104, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 4, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1217550464, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 4, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1217763456, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 4, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1217976448, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 4, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1218631808, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 4, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1221056640, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 4, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1221269632, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 4, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1221482624, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 4, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1221695616, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 4, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1222350976, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 4, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1222563968, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 4, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1223219328, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 4, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1223874688, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 4, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1226299520, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 4, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1226954880, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 4, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1227610240, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 4, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1228265600, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 4, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1230690432, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 4, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1233115264, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 4, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1233328256, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 4, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1233983616, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 4, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1234638976, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 4, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1237063808, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 4, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1237276800, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 4, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1237489792, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 4, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1237702784, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 4, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1238358144, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 4, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1238571136, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 4, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1238784128, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 4, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1239439488, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 4, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1240094848, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 4, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1240307840, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 4, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1240520832, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 4, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1240733824, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 4, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1241389184, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 4, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1242044544, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 4, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1242257536, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 4, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1242470528, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 4, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1243125888, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 4, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1243781248, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 4, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1243994240, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 4, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1244649600, + "layers": { + "layer.0.self_attn.q_proj": 4, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1245304960, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 4, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1245517952, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 4, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1245730944, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 4, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1245943936, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 4, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1246156928, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 4, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1246369920, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 5, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1248794752, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 5, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1251219584, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 5, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1251874944, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 5, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1254299776, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 5, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1256724608, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 5, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1259149440, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 5, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1261574272, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 5 + } + }, + { + "memory": 1263999104, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 5, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1266423936, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 5, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1268848768, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 5, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1271273600, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 5, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1273698432, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 5, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1274353792, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 5, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1276778624, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 5, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1279203456, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 5, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1279416448, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 5, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1281841280, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 5, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1284266112, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 5, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1286690944, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 5, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1289115776, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 5, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1291540608, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 5, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1292195968, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 5, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1292851328, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 5, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1295276160, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 5, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1297700992, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 5, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1300125824, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 5, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1302550656, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 5, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1303206016, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 5, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1305630848, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 5, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1308055680, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 5, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1310480512, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 5, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1312905344, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 5, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1315330176, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 5, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1315985536, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 5, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1316198528, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 5, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1318623360, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 5, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1318836352, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 5, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1321261184, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 5, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1321916544, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 5, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1322129536, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 5, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1322784896, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 5, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1322997888, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 5, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1325422720, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 5, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1325635712, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 5, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1328060544, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 5, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1328273536, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 5, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1328928896, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 5, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1329584256, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 5, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1330239616, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 5, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1332664448, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 5, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1335089280, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 5, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1337514112, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 5, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1338169472, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 5, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1338824832, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 5, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1339037824, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 5, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1341462656, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 5, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1343887488, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 5, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1344100480, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 5, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1346525312, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 5, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1346738304, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 5, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1349163136, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 5, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1349818496, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 5, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1352243328, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 5, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1354668160, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 5, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1357092992, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 5, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1357305984, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 5, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1359730816, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 5, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1359943808, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 5, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1360599168, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 5, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1361254528, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 5, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1361909888, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 5, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1362122880, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 5, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1362335872, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 5, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1362548864, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 5, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1362761856, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 5, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1363417216, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 5, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1363630208, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 5, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1363843200, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 5, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1366268032, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 5, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1366923392, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 5, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1367136384, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 5, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1367349376, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 5, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1367562368, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 5, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1369987200, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 5, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1370642560, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 5, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1370855552, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 5, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1373280384, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 5, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1373935744, + "layers": { + "layer.0.self_attn.q_proj": 5, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1374591104, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 5, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1374804096, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 5, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1375459456, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 5, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1377884288, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 5, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1378539648, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 5, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1379195008, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 5, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1381619840, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 5, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1382275200, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 5, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1382488192, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 5, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1383143552, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 5, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1383798912, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 5, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1384454272, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 5, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1385109632, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 5, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1385322624, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 5, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1385535616, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 5, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1385748608, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 5, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1386403968, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 5, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1386616960, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 5, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1386829952, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 5, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1387485312, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 5, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 6, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1389910144, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 6, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 6, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 5, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1390123136, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 6, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 6, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 5, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1390336128, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 6, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 6, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 6, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 5, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1390549120, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 6, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 6, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 6, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 6, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1392973952, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 6, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 6, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 6, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 6, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1395398784, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 6, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 6, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 6, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 6, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1396054144, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 6, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 6, + "layer.5.mlp.gate_proj": 6, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 6, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1398478976, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 6, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 6, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 6, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1399134336, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 6, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 6, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 6, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1401559168, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 6, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 6, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 6, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1403984000, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 6, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 6, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 6 + } + }, + { + "memory": 1406408832, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 6, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 6, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 6, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1408833664, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 6, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 6, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 6, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1411258496, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 6, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 6, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 6, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1413683328, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 6, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 6, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1416108160, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 6, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 6, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1418532992, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 6, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 6, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1420957824, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 6, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 6, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1423382656, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 6, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 6, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1425807488, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 6, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 6, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1428232320, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 6, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 6, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1430657152, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 6, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 6, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1433081984, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 6, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 6, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1435506816, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 6, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 6, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1435719808, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 6, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 6, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1438144640, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 6, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 6, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1440569472, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 6, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1440782464, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 6, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1443207296, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 6, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1445632128, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 6, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1448056960, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 6, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1450481792, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 6, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1451137152, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 6, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1453561984, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 6, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1455986816, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 6, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1458411648, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 6, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1460836480, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 6, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1463261312, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 6, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1465686144, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 6, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1468110976, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 6, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1470535808, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 6, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1472960640, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 6, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1475385472, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 6, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1477810304, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 6, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1480235136, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 6, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1480890496, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 6, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1483315328, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 6, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1483528320, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 6, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1483741312, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 6, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1486166144, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 6, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1486821504, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 6, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1489246336, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 6, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1489901696, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 6, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1490114688, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 6, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1490327680, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 6, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1492752512, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 6, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1492965504, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 6, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1493620864, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 6, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1493833856, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 6, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1496258688, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 6, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1498683520, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 6, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1501108352, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 6, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1503533184, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 6, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1504188544, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 6, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1504843904, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 6, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1505056896, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 6, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1505712256, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 6, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1505925248, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 6, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1506580608, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 6, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1507235968, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 6, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1507891328, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 6, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1508104320, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 6, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1508759680, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 6, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1508972672, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 6, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1509628032, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 6, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1512052864, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 6, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1512708224, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 6, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1515133056, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 6, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1515788416, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 6, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1516443776, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 6, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1516656768, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 6, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1519081600, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 6, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1519736960, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 6, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1520392320, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 6, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1521047680, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 6, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1521703040, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 6, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1522358400, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 6, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1523013760, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 6, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1523669120, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 6, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1523882112, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 6, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1524537472, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 6, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1524750464, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 6, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1524963456, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 6, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1525176448, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 6, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1525831808, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 6, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1526044800, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 6, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1526257792, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 6, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1526470784, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 6, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1527126144, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 6, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1527781504, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 6, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1527994496, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 6, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1528649856, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 6, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1529305216, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 6, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1529518208, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 6, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1529731200, + "layers": { + "layer.0.self_attn.q_proj": 6, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1530386560, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 6, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1530599552, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 6, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1530812544, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 6, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1533237376, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 6, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 7, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1533450368, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 6, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 7, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1533663360, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 7, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 6, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1533876352, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 6, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 7, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1534089344, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 6, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 7, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1534302336, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 6, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 7, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1534515328, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 6, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 7, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1534728320, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 7, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 7, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1537153152, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 7, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 7, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1539577984, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 7, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 7 + } + }, + { + "memory": 1542002816, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 7, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 7, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1544427648, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 7, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 7, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1546852480, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 7, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 7, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1549277312, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 7, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 7, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1551702144, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 7, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 7, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1552357504, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 7, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 7, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1554782336, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 7, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 7, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1557207168, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 7, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 7, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1559632000, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 7, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 7, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1562056832, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 7, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 7, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1564481664, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 7, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 7, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1566906496, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 7, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 7, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1569331328, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 7, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 7, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1571756160, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 7, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 7, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1572411520, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 7, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1574836352, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 7, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1577261184, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 7, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1579686016, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 7, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1582110848, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 7, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1584535680, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 7, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1586960512, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 7, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1587173504, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 7, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1589598336, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 7, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1590253696, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 7, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1592678528, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 7, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1593333888, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 7, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1595758720, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 7, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1595971712, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 7, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1598396544, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 7, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1600821376, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 7, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1603246208, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 7, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1605671040, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 7, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1605884032, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 7, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1606539392, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 7, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1608964224, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 7, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1611389056, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 7, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1611602048, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 7, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1614026880, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 7, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1616451712, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 7, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1618876544, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 7, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1619531904, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 7, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1621956736, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 7, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1622612096, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 7, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1623267456, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 7, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1623922816, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 7, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1624578176, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 7, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1627003008, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 7, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1627658368, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 7, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1627871360, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 7, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1628084352, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 7, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1628297344, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 7, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1628952704, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 7, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1631377536, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 7, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1633802368, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 7, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1634015360, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 7, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1634228352, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 7, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1636653184, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 7, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1639078016, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 7, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1639291008, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 7, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1639504000, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 7, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1639716992, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 7, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1640372352, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 7, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1642797184, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 7, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1645222016, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 7, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1645877376, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 7, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1646090368, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 7, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1646745728, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 7, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1649170560, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 7, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1649825920, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 7, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1650038912, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 7, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1650251904, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 7, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1650464896, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 7, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1650677888, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 7, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1651333248, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 7, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1653758080, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 7, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1654413440, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 7, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1655068800, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 7, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1655281792, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 7, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1657706624, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 7, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1657919616, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 7, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1658132608, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 7, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1658787968, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 7, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1659000960, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 7, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1659213952, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 7, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1659869312, + "layers": { + "layer.0.self_attn.q_proj": 7, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1660524672, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 7, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1661180032, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 7, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1661393024, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 7, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1661606016, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 7, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1662261376, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 7, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1662916736, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 7, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1663129728, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 7, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1663342720, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 7, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1665767552, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 7, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1666422912, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 7, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1667078272, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 7, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1667733632, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 7, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1667946624, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 7, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1668601984, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 7, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1669257344, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 7, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1669470336, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 7, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1669683328, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 7, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1672108160, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 7, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 8, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1672321152, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 7, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 8, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1674745984, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 7, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 8, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1675401344, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 7, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 8, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1675614336, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 7, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 8, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1678039168, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 7, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 8, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1678694528, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 7, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 8, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 8, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1678907520, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 8, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 8, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 8, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 8, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1681332352, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 8, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 8, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 8, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 8 + } + }, + { + "memory": 1683757184, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 8, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 8, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 8, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 8, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1686182016, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 8, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 8, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 8, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1686837376, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 8, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 8, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 8, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1689262208, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 8, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 8, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1691687040, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 8, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 8, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1694111872, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 8, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 8, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1696536704, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 8, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 8, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1696749696, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 8, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 8, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1697405056, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 8, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 8, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1699829888, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 8, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 8, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1702254720, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 8, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 8, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1704679552, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 8, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 8, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1707104384, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 8, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 8, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1709529216, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 8, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 8, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1711954048, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 8, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 8, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1714378880, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 8, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 8, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1716803712, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 8, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 8, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1719228544, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 8, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 8, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1721653376, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 8, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 8, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1724078208, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 8, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 8, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1726503040, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 8, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 8, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1728927872, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 8, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1729140864, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 8, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1729796224, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 8, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1732221056, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 8, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1734645888, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 8, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1737070720, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 8, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1737726080, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 8, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1740150912, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 8, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1742575744, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 8, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1745000576, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 8, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1747425408, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 8, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1749850240, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 8, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1750063232, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 8, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1752488064, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 8, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1754912896, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 8, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1757337728, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 8, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1759762560, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 8, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1762187392, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 8, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1762400384, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 8, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1764825216, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 8, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1765480576, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 8, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1766135936, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 8, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1766791296, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 8, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1769216128, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 8, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1771640960, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 8, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1771853952, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 8, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1774278784, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 8, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1774934144, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 8, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1777358976, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 8, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1779783808, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 8, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1779996800, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 8, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1780209792, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 8, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1780865152, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 8, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1781078144, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 8, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1783502976, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 8, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1785927808, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 8, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1786583168, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 8, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1789008000, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 8, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1789220992, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 8, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1789433984, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 8, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1789646976, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 8, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1789859968, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 8, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1790072960, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 8, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1790728320, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 8, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1793153152, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 8, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1793808512, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 8, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1796233344, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 8, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1796888704, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 8, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1797101696, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 8, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1799526528, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 8, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1801951360, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 8, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1802606720, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 8, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1803262080, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 8, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1803475072, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 8, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1804130432, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 8, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1804343424, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 8, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1804998784, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 8, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1805654144, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 8, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1808078976, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 8, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1808291968, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 8, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1810716800, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 8, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1811372160, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 8, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1811585152, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 8, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1811798144, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 8, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1812011136, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 8, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1812666496, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 8, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1813321856, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 8, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1813977216, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 8, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1814632576, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 8, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1815287936, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 8, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1815500928, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 8, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1815713920, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 8, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1816369280, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 8, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1817024640, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 8, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1817680000, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 8, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1818335360, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 8, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1818548352, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 8, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1819203712, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 8, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1819859072, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 8, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1820072064, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 8, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1820285056, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 8, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1820940416, + "layers": { + "layer.0.self_attn.q_proj": 8, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1821595776, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 8, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1821808768, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 8, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1822021760, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 8, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1822234752, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 8, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1822447744, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 8, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1822660736, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 8, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1822873728, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 9, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 8, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1823086720, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 9, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 9, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1823299712, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 9, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 9, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1825724544, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 9, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 9, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1828149376, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 9, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 9, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1830574208, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 9, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 9 + } + }, + { + "memory": 1832999040, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 9, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1833212032, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 9, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1835636864, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 9, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1838061696, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 9, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1838717056, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 9, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1841141888, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 9, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1843566720, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 9, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1845991552, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 9, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1848416384, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 9, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1850841216, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 9, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1853266048, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 9, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1855690880, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 9, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1855903872, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 9, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1856559232, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 9, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1858984064, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 9, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1861408896, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 9, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1863833728, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 9, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1866258560, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 9, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1868683392, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 9, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1871108224, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 9, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1873533056, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 9, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1875957888, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 9, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1876613248, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 9, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1879038080, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 9, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1879693440, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 9, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1882118272, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 9, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1884543104, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 9, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1886967936, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 9, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1889392768, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 9, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1891817600, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 9, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1892030592, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 9, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1894455424, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 9, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1896880256, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 9, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1897093248, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 9, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1897306240, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 9, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1899731072, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 9, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1902155904, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 9, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1904580736, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 9, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1907005568, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 9, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1907660928, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 9, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1910085760, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 9, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1910298752, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 9, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1912723584, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 9, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1913378944, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 9, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1914034304, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 9, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1916459136, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 9, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1918883968, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 9, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1919539328, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 9, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1920194688, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 9, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1922619520, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 9, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1925044352, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 9, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1927469184, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 9, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1927682176, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 9, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1928337536, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 9, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1928992896, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 9, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1929648256, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 9, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1930303616, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 9, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1932728448, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 9, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1932941440, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 9, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1933596800, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 9, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1936021632, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 9, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1936676992, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 9, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1936889984, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 9, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1937545344, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 9, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1939970176, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 9, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1940183168, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 9, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1940396160, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 9, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1941051520, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 9, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1943476352, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 9, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1944131712, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 9, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1944787072, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 9, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1945000064, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 9, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1945213056, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 9, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1947637888, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 9, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1948293248, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 9, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1948506240, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 9, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1949161600, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 9, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1949374592, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 9, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1949587584, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 9, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1949800576, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 9, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1950013568, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 9, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1950668928, + "layers": { + "layer.0.self_attn.q_proj": 9, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1951324288, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 9, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1951537280, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 9, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1952192640, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 9, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1952848000, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 9, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1953503360, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 9, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1954158720, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 9, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1954371712, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 9, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1955027072, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 9, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1955682432, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 9, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1956337792, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 9, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1956993152, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 9, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1959417984, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 9, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 10, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1959630976, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 9, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 10, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1959843968, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 9, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 10, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1960056960, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 9, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 10, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1960269952, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 9, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 10, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1960482944, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 9, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 10, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 10, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1960695936, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 9, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 10, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 10, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1963120768, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 10, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 10, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 9, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 10, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1963776128, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 10, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 10, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 9, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 10, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1966200960, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 9, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 10, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 10, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 10, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 10, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1966413952, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 10, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 9, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 10, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 10, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 10, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1966626944, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 10, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 10, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 10, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 9, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 10, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1966839936, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 10, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 10, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 10, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 9, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 10, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1967052928, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 9, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 10, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 10, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 10, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 10, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1967265920, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 10, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 10, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 10, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 10, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 10, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 10, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1969690752, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 10, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 10, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 10, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 10, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 10, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 10, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1972115584, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 10, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 10, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 10, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 10, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 10, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 10 + } + }, + { + "memory": 1974540416, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 10, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 10, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 10, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 10, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 10, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 10, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 1974753408, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 10, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 10, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 10, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 10, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 10, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 1977178240, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 10, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 10, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 10, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 10, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 1977391232, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 10, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 10, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 10, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 10, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 1979816064, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 10, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 10, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 10, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 10, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 1982240896, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 10, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 10, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 10, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 10, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 1984665728, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 10, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 10, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 10, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 1984878720, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 10, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 10, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 1987303552, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 10, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 10, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 1989728384, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 10, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 1992153216, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 10, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 1994578048, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 10, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 1997002880, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 10, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 1999427712, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 10, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2001852544, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 10, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2004277376, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 10, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2006702208, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 10, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2009127040, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 10, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2011551872, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 10, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2013976704, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 10, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2014632064, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 10, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2015287424, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 10, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2017712256, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 10, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2020137088, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 10, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2022561920, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 10, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2023217280, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 10, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2025642112, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 10, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2028066944, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 10, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2030491776, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 10, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2030704768, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 10, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2033129600, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 10, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2033784960, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 10, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2036209792, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 10, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2038634624, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 10, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2041059456, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 10, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2043484288, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 10, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2045909120, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 10, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2046122112, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 10, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2048546944, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 10, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2050971776, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 10, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2053396608, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 10, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2054051968, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 10, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2054264960, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 10, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2054477952, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 10, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2055133312, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 10, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2057558144, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 10, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2058213504, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 10, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2060638336, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 10, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2061293696, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 10, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2061949056, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 10, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2062604416, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 10, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2065029248, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 10, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2067454080, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 10, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2068109440, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 10, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2070534272, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 10, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2070747264, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 10, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2070960256, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 10, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2073385088, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 10, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2075809920, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 10, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2078234752, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 10, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2078890112, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 10, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2079103104, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 10, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2079758464, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 10, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2079971456, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 10, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2080626816, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 10, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2081282176, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 10, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2081495168, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 10, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2081708160, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 10, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2081921152, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 10, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2082134144, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 10, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2082347136, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 10, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2082560128, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 10, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2082773120, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 10, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2082986112, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 10, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2083641472, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 10, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2083854464, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 10, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2086279296, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 10, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2086934656, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 10, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2087590016, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 10, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2090014848, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 10, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2090670208, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 10, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2093095040, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 10, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2093750400, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 10, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2094405760, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 10, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2095061120, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 10, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2097485952, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 10, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2097698944, + "layers": { + "layer.0.self_attn.q_proj": 10, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2098354304, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 10, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2098567296, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 10, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2098780288, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 10, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2099435648, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 10, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2099648640, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 10, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2099861632, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 10, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2100516992, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 10, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2101172352, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 10, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2101827712, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 10, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2102040704, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 10, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2102696064, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 10, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2105120896, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 11, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 10, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2105776256, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 11, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 10, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2105989248, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 10, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 11, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2106202240, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 11, + "layer.4.self_attn.q_proj": 10, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2106857600, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 11, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 10, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2107070592, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 11, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 10, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2107283584, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 11, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 10, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2109708416, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 11, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 10, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 11, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2109921408, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 11, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 10, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 11, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2110134400, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 10, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 11, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 11, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2110789760, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 11, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 11, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 10, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2111445120, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 11, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 11, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 11, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 11, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2113869952, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 11, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 11, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 11, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 11, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2114082944, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 11, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 11, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 11, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 11, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2116507776, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 11, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 11, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 11, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 11, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2118932608, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 11, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 11, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 11, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 11, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2121357440, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 11, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 11, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 11, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 11, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2123782272, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 11, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 11, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 11, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2126207104, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 11, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 11, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 11, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2128631936, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 11, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 11, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 11, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2131056768, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 11, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 11, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2131712128, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 11, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 11, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2134136960, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 11, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2136561792, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 11, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2138986624, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 11, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2139199616, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 11, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2141624448, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 11 + } + }, + { + "memory": 2144049280, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 11, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2146474112, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 11, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2148898944, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 11, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2151323776, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 11, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2151979136, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 11, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2154403968, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 11, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2154616960, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 11, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2155272320, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 11, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2157697152, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 11, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2160121984, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 11, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2162546816, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 11, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2164971648, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 11, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2167396480, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 11, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2169821312, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 11, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2172246144, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 11, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2172901504, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 11, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2175326336, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 11, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2175981696, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 11, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2176637056, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 11, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2179061888, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 11, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2179717248, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 11, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2180372608, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 11, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2180585600, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 11, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2183010432, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 11, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2183665792, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 11, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2184321152, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 11, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2186745984, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 11, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2186958976, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 11, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2189383808, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 11, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2189596800, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 11, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2192021632, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 11, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2194446464, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 11, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2196871296, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 11, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2199296128, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 11, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2201720960, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 11, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2204145792, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 11, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2206570624, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 11, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2208995456, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 11, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2209208448, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 11, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2209421440, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 11, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2209634432, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 11, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2209847424, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 11, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2212272256, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 11, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2212485248, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 11, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2213140608, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 11, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2215565440, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 11, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2217990272, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 11, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2218203264, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 11, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2220628096, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 11, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2221283456, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 11, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2221496448, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 11, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2221709440, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 11, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2222364800, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 11, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2223020160, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 11, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2223675520, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 11, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2223888512, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 11, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2224101504, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 11, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2224756864, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 11, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2227181696, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 11, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2227837056, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 11, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2228492416, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 11, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2229147776, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 11, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2229803136, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 11, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2230458496, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 11, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2231113856, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 11, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2231326848, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 11, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2231982208, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 11, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2232637568, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 11, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2232850560, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 11, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2235275392, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 11, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2235930752, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 11, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2238355584, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 11, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2240780416, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 11, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2240993408, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 11, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2241206400, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 11, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2241861760, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 11, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2244286592, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 11, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2244499584, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 11, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2244712576, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 11, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2245367936, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 11, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2247792768, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 11, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2248005760, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 11, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2248661120, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 11, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2248874112, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 11, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2249087104, + "layers": { + "layer.0.self_attn.q_proj": 11, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 12, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2249742464, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 11, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 12, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2250397824, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 11, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 12, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2250610816, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 11, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 12, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2251266176, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 12, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 11, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2253691008, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 12, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 12, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 11, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2253904000, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 11, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 12, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 12, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2254116992, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 12, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 12, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 11, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2254329984, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 12, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 12, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 11, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2254542976, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 12, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 12, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 11, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2254755968, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 12, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 12, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 11, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2255411328, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 11, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 12, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 12, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 12, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2255624320, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 12, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 12, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 12, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 12, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 12, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2258049152, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 12, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 12, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 12, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 12, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2258262144, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 12, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 12, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 12, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 12, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2260686976, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 12, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 12, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 12, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 12 + } + }, + { + "memory": 2263111808, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 12, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 12, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 12, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 12, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2265536640, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 12, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 12, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 12, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 12, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2267961472, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 12, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 12, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 12, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 12, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2270386304, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 12, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 12, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 12, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2272811136, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 12, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 12, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2273024128, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 12, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 12, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2275448960, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 12, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 12, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2277873792, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 12, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 12, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2280298624, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 12, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 12, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2282723456, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 12, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 12, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2285148288, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 12, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 12, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2287573120, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 12, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 12, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2289997952, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 12, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 12, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2290653312, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 12, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 12, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2293078144, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 12, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2293733504, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 12, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2296158336, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 12, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2298583168, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 12, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2298796160, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 12, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2301220992, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 12, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2303645824, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 12, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2306070656, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 12, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2308495488, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 12, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2310920320, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 12, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2313345152, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 12, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2314000512, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 12, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2316425344, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 12, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2318850176, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 12, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2321275008, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 12, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2321930368, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 12, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2322143360, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 12, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2324568192, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 12, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2324781184, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 12, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2327206016, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 12, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2327419008, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 12, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2329843840, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 12, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2330499200, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 12, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2332924032, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 12, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2333579392, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 12, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2333792384, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 12, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2336217216, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 12, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2336872576, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 12, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2337527936, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 12, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2338183296, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 12, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2338838656, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 12, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2341263488, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 12, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2341476480, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 12, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2343901312, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 12, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2344556672, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 12, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2344769664, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 12, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2345425024, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 12, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2347849856, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 12, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2348062848, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 12, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2348718208, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 12, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2349373568, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 12, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2350028928, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 12, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2352453760, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 12, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2354878592, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 12, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2355091584, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 12, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2357516416, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 12, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2357729408, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 12, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2357942400, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 12, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2358597760, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 12, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2359253120, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 12, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2359466112, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 12, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2361890944, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 12, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2362103936, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 12, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2362316928, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 12, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2364741760, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 12, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2365397120, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 12, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2366052480, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 12, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2366265472, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 12, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2368690304, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 12, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2371115136, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 12, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2371770496, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 12, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2371983488, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 12, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2374408320, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 12, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2374621312, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 12, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2377046144, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 12, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2377701504, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 12, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2380126336, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 12, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2380781696, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 12, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2380994688, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 12, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2381650048, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 12, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2382305408, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 12, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2382518400, + "layers": { + "layer.0.self_attn.q_proj": 12, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2383173760, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 12, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2383829120, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 13, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 12, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2384484480, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 13, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 12, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2384697472, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 13, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 12, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2384910464, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 13, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 12, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2385565824, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 13, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 12, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2387990656, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 13, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 12, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2388203648, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 13, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 12, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2388416640, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 12, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 13, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2388629632, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 13, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 12, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2389284992, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 13, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 12, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2389940352, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 13, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 12, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2392365184, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 12, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 13, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2393020544, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 13, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 12, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2395445376, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 12, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 13, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2395658368, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 13, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 12, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2395871360, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 12, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 13, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2396084352, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 13, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 12, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2396297344, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 13, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 12, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2398722176, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 12, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 13, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 13, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2398935168, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 13, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 12, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 13, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2399148160, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 13, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 13, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 12, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2399803520, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 13, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 13, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 13, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2402228352, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 13, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 13, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 13, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2404653184, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 13, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 13, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 13, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2407078016, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 13, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 13, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2407733376, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 13, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2410158208, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 13, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2412583040, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 13, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2415007872, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 13, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2417432704, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 13, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2419857536, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 13, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2422282368, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 13, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2424707200, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 13, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2427132032, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 13, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2429556864, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 13, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2429769856, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 13, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2432194688, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 13, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2434619520, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 13, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2437044352, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 13, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2439469184, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 13, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2441894016, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 13, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2444318848, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 13, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2444974208, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 13, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2447399040, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 13, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2449823872, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 13 + } + }, + { + "memory": 2452248704, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 13, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2454673536, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 13, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2457098368, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 13, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2459523200, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 13, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2460178560, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 13, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2460833920, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 13, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2461046912, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 13, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2461259904, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 13, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2463684736, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 13, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2464340096, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 13, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2464995456, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 13, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2467420288, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 13, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2469845120, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 13, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2472269952, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 13, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2472482944, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 13, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2473138304, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 13, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2475563136, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 13, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2477987968, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 13, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2480412800, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 13, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2482837632, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 13, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2483492992, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 13, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2483705984, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 13, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2483918976, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 13, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2486343808, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 13, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2486999168, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 13, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2487654528, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 13, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2487867520, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 13, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2488522880, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 13, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2488735872, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 13, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2488948864, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 13, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2491373696, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 13, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2493798528, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 13, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2496223360, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 13, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2496436352, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 13, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2497091712, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 13, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2497304704, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 13, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2499729536, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 13, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2499942528, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 13, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2502367360, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 13, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2503022720, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 13, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2503235712, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 13, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2505660544, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 13, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2506315904, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 13, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2506971264, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 13, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2507184256, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 13, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2507839616, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 13, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2508052608, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 13, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2508265600, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 13, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2510690432, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 13, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2510903424, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 13, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2511558784, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 13, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2511771776, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 13, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2512427136, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 13, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2513082496, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 13, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2513295488, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 13, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2513950848, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 13, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2516375680, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 13, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2518800512, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 13, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2519455872, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 13, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2519668864, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 13, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2520324224, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 13, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2520537216, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 13, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2521192576, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 13, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2521847936, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 13, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2522503296, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 13, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2524928128, + "layers": { + "layer.0.self_attn.q_proj": 13, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2525583488, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 13, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2525796480, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 13, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2528221312, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 13, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2528876672, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 13, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2529532032, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 13, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2531956864, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 13, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2532612224, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 13, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2532825216, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 13, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2533038208, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 13, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2533251200, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 13, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2533464192, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 13, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2534119552, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 13, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2534332544, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 13, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2534545536, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 13, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2535200896, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 14, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 13, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2535413888, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 14, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 13, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2537838720, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 13, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 14, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 14, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2538051712, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 14, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 14, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 13, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 14, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2538264704, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 14, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 14, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 13, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 14, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2538920064, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 14, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 13, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 14, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 14, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2541344896, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 13, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 14, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 14, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 14, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 14, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2543769728, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 14, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 14, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 14, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 14, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 13, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 14, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2543982720, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 14, + "layer.1.mlp.down_proj": 14, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 14, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 14, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 14, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 14, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2546407552, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 14, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 14, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 14, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 14, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 14, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 14, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2548832384, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 14, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 14, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 14, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 14, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 14, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2551257216, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 14, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 14, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 14, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 14, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 14 + } + }, + { + "memory": 2553682048, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 14, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 14, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 14, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 14, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 14, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2556106880, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 14, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 14, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 14, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 14, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2556319872, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 14, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 14, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 14, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 14, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2558744704, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 14, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 14, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 14, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2561169536, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 14, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 14, + "layer.14.mlp.up_proj": 14, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2563594368, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 14, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 14, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 14, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2566019200, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 14, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 14, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 14, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2568444032, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 14, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 14, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2569099392, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 14, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 14, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2571524224, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 14, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 14, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2573949056, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 14, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 14, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2576373888, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 14, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 14, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2577029248, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 14, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2579454080, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 14, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2581878912, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 14, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2584303744, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 14, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2584516736, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 14, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2584729728, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 14, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2587154560, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 14, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2589579392, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 14, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2592004224, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 14, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2594429056, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 14, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2595084416, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 14, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2597509248, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 14, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2599934080, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 14, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2600589440, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 14, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2603014272, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 14, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2605439104, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 14, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2607863936, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 14, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2608076928, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 14, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2608732288, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 14, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2611157120, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 14, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2611812480, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 14, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2612025472, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 14, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2614450304, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 14, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2616875136, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 14, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2617088128, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 14, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2617743488, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 14, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2618398848, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 14, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2620823680, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 14, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2623248512, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 14, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2623903872, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 14, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2624116864, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 14, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2624329856, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 14, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2624542848, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 14, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2625198208, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 14, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2625411200, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 14, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2626066560, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 14, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2626721920, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 14, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2627377280, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 14, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2629802112, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 14, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2632226944, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 14, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2632439936, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 14, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2633095296, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 14, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2635520128, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 14, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2636175488, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 14, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2638600320, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 14, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2641025152, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 14, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2641238144, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 14, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2641451136, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 14, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2642106496, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 14, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2642761856, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 14, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2643417216, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 14, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2645842048, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 14, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2646055040, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 14, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2648479872, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 14, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2648692864, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 14, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2648905856, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 14, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2649561216, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 14, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2649774208, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 14, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2650429568, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 14, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2650642560, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 14, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2650855552, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 14, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2653280384, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 14, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2653493376, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 14, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2653706368, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 14, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2654361728, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 14, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2656786560, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 14, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2657441920, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 14, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2657654912, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 14, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2660079744, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 14, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2662504576, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 14, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2663159936, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 14, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2663372928, + "layers": { + "layer.0.self_attn.q_proj": 14, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2664028288, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 14, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2664241280, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 14, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2664896640, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 14, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2665109632, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 14, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2665322624, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 14, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2667747456, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 14, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2670172288, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 15, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 14, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2672597120, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 15, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 14, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2673252480, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 15, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 14, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2673907840, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 15, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 14, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2674563200, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 15, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 14, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2675218560, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 15, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 14, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2675431552, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 15, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 14, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2676086912, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 15, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 14, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2678511744, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 15, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 14, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2680936576, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 15, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 14, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 15, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2683361408, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 15, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 14, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 15, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2684016768, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 15, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 15, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 14, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 15, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2684229760, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 15, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 14, + "layer.6.self_attn.o_proj": 15, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 15, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2684442752, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 15, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 15, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 14, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 15, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2686867584, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 15, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 14, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 15, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 15, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 15, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2687080576, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 14, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 15, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 15, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 15, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 15, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2687293568, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 15, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 15, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 14, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 15, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 15, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 15, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2687948928, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 15, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 15, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 15, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 14, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 15, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 15, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 15, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2688161920, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 15, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 15, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 15, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 15, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 15, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 15, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 15, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2690586752, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 15, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 15, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 15, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 15, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 15, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 15, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2693011584, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 15, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 15, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 15, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 15, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 15, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2693666944, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 15, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 15, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 15, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 15, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 15, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2696091776, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 15, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 15, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 15, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 15, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 15, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2698516608, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 15, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 15, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 15, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 15, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 15, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2700941440, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 15, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 15, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 15, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 15, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2703366272, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 15, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 15, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 15, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 15, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2705791104, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 15, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 15, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 15, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 15, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2708215936, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 15, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 15, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 15, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2708428928, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 15, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 15, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 15, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2710853760, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 15, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 15, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 15, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2713278592, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 15, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 15, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2713933952, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 15, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 15, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2716358784, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 15, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 15, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2718783616, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 15, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 15, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2721208448, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 15, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 15 + } + }, + { + "memory": 2723633280, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 15, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2723846272, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 15, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2726271104, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 15, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2728695936, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 15, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2731120768, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 15, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2733545600, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 15, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2735970432, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 15, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2738395264, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 15, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2740820096, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 15, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2741033088, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 15, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2741688448, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 15, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2744113280, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 15, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2746538112, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 15, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2748962944, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 15, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2751387776, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 15, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2753812608, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 15, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2756237440, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 15, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2756450432, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 15, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2756663424, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 15, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2759088256, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 15, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2759743616, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 15, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2762168448, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 15, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2762381440, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 15, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2763036800, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 15, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2765461632, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 15, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2767886464, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 15, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2770311296, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 15, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2772736128, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 15, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2775160960, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 15, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2777585792, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 15, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2780010624, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 15, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2780665984, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 15, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2781321344, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 15, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2783746176, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 15, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2783959168, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 15, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2786384000, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 15, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2788808832, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 15, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2789021824, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 15, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2791446656, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 15, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2791659648, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 15, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2792315008, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 15, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2792528000, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 15, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2794952832, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 15, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2795165824, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 15, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2795378816, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 15, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2796034176, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 15, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2796689536, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 15, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2799114368, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 15, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2799327360, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 15, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2799982720, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 15, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2800638080, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 15, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2801293440, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 15, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2801506432, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 15, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2801719424, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 15, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2802374784, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 15, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2803030144, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 15, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2805454976, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 15, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2805667968, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 15, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2806323328, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 15, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2806536320, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 15, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2807191680, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 15, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2809616512, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 15, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2809829504, + "layers": { + "layer.0.self_attn.q_proj": 15, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2810484864, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 15, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2810697856, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 15, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2811353216, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 15, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2812008576, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 15, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2812221568, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 15, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2812434560, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 15, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2813089920, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 15, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2813302912, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 15, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 16, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2813958272, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 15, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 16, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 16, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2814613632, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 15, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 16, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 16, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 16, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2815268992, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 16, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 16, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 16, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 16, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 15, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2815481984, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 15, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 16, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 16, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 16, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 16, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 16, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2817906816, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 16, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 16, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 16, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 16, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 15, + "layer.5.self_attn.k_proj": 16, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 16, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2818562176, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 16, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 16, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 16, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 16, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 15, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 16, + "layer.5.self_attn.k_proj": 16, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 16, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2818775168, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 16, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 15, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 16, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 16, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 16, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 16, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 16, + "layer.5.self_attn.k_proj": 16, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 16, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2819430528, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 16, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 16, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 16, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 16, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 16, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 16, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 16, + "layer.5.self_attn.k_proj": 16, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 16, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 15, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2820085888, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 16, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 16, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 16, + "layer.2.self_attn.k_proj": 15, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 16, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 16, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 16, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 16, + "layer.5.self_attn.k_proj": 16, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 16, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 16, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2820298880, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 16, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 16, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 16, + "layer.2.self_attn.k_proj": 16, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 16, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 16, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 16, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 16, + "layer.5.self_attn.k_proj": 16, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 16, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 16, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 15, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2820511872, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 16, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 16, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 16, + "layer.2.self_attn.k_proj": 16, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 16, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 16, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 16, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 16, + "layer.5.self_attn.k_proj": 16, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 16, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 16, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 16, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 15, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2821167232, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 16, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 16, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 16, + "layer.2.self_attn.k_proj": 16, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 16, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 16, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 16, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 16, + "layer.5.self_attn.k_proj": 16, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 16, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 16, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 15, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 16, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 16, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2821380224, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 16, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 16, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 15, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 16, + "layer.2.self_attn.k_proj": 16, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 16, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 16, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 16, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 16, + "layer.5.self_attn.k_proj": 16, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 16, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 16, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 16, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 16, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 16, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2822035584, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 16, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 16, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 16, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 16, + "layer.2.self_attn.k_proj": 16, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 16, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 16, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 16, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 16, + "layer.5.self_attn.k_proj": 16, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 16, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 15, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 16, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 16, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 16, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 16, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2822248576, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 16, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 16, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 16, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 16, + "layer.2.self_attn.k_proj": 16, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 16, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 15, + "layer.3.self_attn.o_proj": 16, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 16, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 16, + "layer.5.self_attn.k_proj": 16, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 16, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 16, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 16, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 16, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 16, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 16, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2822461568, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 16, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 16, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 16, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 16, + "layer.2.self_attn.k_proj": 16, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 16, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 16, + "layer.3.self_attn.o_proj": 16, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 16, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 16, + "layer.5.self_attn.k_proj": 16, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 16, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 16, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 15, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 16, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 16, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 16, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 16, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2823116928, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 16, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 16, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 16, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 16, + "layer.2.self_attn.k_proj": 16, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 16, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 16, + "layer.3.self_attn.o_proj": 16, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 16, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 16, + "layer.5.self_attn.k_proj": 16, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 16, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 16, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 16, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 16, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 16, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 16, + "layer.14.self_attn.v_proj": 15, + "layer.14.self_attn.o_proj": 16, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2823329920, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 16, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 16, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 16, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 16, + "layer.2.self_attn.k_proj": 16, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 16, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 16, + "layer.3.self_attn.o_proj": 16, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 16, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 16, + "layer.5.self_attn.k_proj": 16, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 15, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 16, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 16, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 16, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 16, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 16, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 16, + "layer.14.self_attn.v_proj": 16, + "layer.14.self_attn.o_proj": 16, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2823985280, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 16, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 16, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 16, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 16, + "layer.2.self_attn.k_proj": 16, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 16, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 16, + "layer.3.self_attn.o_proj": 16, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 16, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 16, + "layer.5.self_attn.k_proj": 16, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 16, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 16, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 15, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 16, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 16, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 16, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 16, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 16, + "layer.14.self_attn.v_proj": 16, + "layer.14.self_attn.o_proj": 16, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2826410112, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 16, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 16, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 16, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 16, + "layer.2.self_attn.k_proj": 16, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 16, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 16, + "layer.3.self_attn.o_proj": 16, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 16, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 16, + "layer.5.self_attn.k_proj": 16, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 16, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 16, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 16, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 16, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 16, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 16, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 16, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 15, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 16, + "layer.14.self_attn.v_proj": 16, + "layer.14.self_attn.o_proj": 16, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2826623104, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 16, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 16, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 16, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 16, + "layer.2.self_attn.k_proj": 16, + "layer.2.self_attn.v_proj": 15, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 16, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 16, + "layer.3.self_attn.o_proj": 16, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 16, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 16, + "layer.5.self_attn.k_proj": 16, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 16, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 16, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 16, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 16, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 16, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 16, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 16, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 16, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 16, + "layer.14.self_attn.v_proj": 16, + "layer.14.self_attn.o_proj": 16, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2826836096, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 16, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 16, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 16, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 16, + "layer.2.self_attn.k_proj": 16, + "layer.2.self_attn.v_proj": 16, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 16, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 16, + "layer.3.self_attn.o_proj": 16, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 16, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 16, + "layer.5.self_attn.k_proj": 16, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 16, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 16, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 16, + "layer.7.mlp.down_proj": 15, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 16, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 16, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 16, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 16, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 16, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 16, + "layer.14.self_attn.v_proj": 16, + "layer.14.self_attn.o_proj": 16, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2829260928, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 16, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 16, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 16, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 16, + "layer.2.self_attn.k_proj": 16, + "layer.2.self_attn.v_proj": 16, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 16, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 16, + "layer.3.self_attn.o_proj": 16, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 16, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 16, + "layer.5.self_attn.k_proj": 16, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 16, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 16, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 16, + "layer.7.mlp.down_proj": 16, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 16, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 16, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 16, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 15, + "layer.12.self_attn.k_proj": 16, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 16, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 16, + "layer.14.self_attn.v_proj": 16, + "layer.14.self_attn.o_proj": 16, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2829916288, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 16, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 16, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 16, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 15, + "layer.2.self_attn.q_proj": 16, + "layer.2.self_attn.k_proj": 16, + "layer.2.self_attn.v_proj": 16, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 16, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 16, + "layer.3.self_attn.o_proj": 16, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 16, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 16, + "layer.5.self_attn.k_proj": 16, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 16, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 16, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 16, + "layer.7.mlp.down_proj": 16, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 16, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 16, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 16, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 16, + "layer.12.self_attn.k_proj": 16, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 16, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 16, + "layer.14.self_attn.v_proj": 16, + "layer.14.self_attn.o_proj": 16, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + }, + { + "memory": 2832341120, + "layers": { + "layer.0.self_attn.q_proj": 16, + "layer.0.self_attn.k_proj": 16, + "layer.0.self_attn.v_proj": 16, + "layer.0.self_attn.o_proj": 16, + "layer.0.mlp.gate_proj": 16, + "layer.0.mlp.up_proj": 16, + "layer.0.mlp.down_proj": 16, + "layer.1.self_attn.q_proj": 16, + "layer.1.self_attn.k_proj": 16, + "layer.1.self_attn.v_proj": 16, + "layer.1.self_attn.o_proj": 16, + "layer.1.mlp.gate_proj": 16, + "layer.1.mlp.up_proj": 16, + "layer.1.mlp.down_proj": 16, + "layer.2.self_attn.q_proj": 16, + "layer.2.self_attn.k_proj": 16, + "layer.2.self_attn.v_proj": 16, + "layer.2.self_attn.o_proj": 16, + "layer.2.mlp.gate_proj": 16, + "layer.2.mlp.up_proj": 16, + "layer.2.mlp.down_proj": 16, + "layer.3.self_attn.q_proj": 16, + "layer.3.self_attn.k_proj": 16, + "layer.3.self_attn.v_proj": 16, + "layer.3.self_attn.o_proj": 16, + "layer.3.mlp.gate_proj": 16, + "layer.3.mlp.up_proj": 16, + "layer.3.mlp.down_proj": 16, + "layer.4.self_attn.q_proj": 16, + "layer.4.self_attn.k_proj": 16, + "layer.4.self_attn.v_proj": 16, + "layer.4.self_attn.o_proj": 16, + "layer.4.mlp.gate_proj": 16, + "layer.4.mlp.up_proj": 16, + "layer.4.mlp.down_proj": 16, + "layer.5.self_attn.q_proj": 16, + "layer.5.self_attn.k_proj": 16, + "layer.5.self_attn.v_proj": 16, + "layer.5.self_attn.o_proj": 16, + "layer.5.mlp.gate_proj": 16, + "layer.5.mlp.up_proj": 16, + "layer.5.mlp.down_proj": 16, + "layer.6.self_attn.q_proj": 16, + "layer.6.self_attn.k_proj": 16, + "layer.6.self_attn.v_proj": 16, + "layer.6.self_attn.o_proj": 16, + "layer.6.mlp.gate_proj": 16, + "layer.6.mlp.up_proj": 16, + "layer.6.mlp.down_proj": 16, + "layer.7.self_attn.q_proj": 16, + "layer.7.self_attn.k_proj": 16, + "layer.7.self_attn.v_proj": 16, + "layer.7.self_attn.o_proj": 16, + "layer.7.mlp.gate_proj": 16, + "layer.7.mlp.up_proj": 16, + "layer.7.mlp.down_proj": 16, + "layer.8.self_attn.q_proj": 16, + "layer.8.self_attn.k_proj": 16, + "layer.8.self_attn.v_proj": 16, + "layer.8.self_attn.o_proj": 16, + "layer.8.mlp.gate_proj": 16, + "layer.8.mlp.up_proj": 16, + "layer.8.mlp.down_proj": 16, + "layer.9.self_attn.q_proj": 16, + "layer.9.self_attn.k_proj": 16, + "layer.9.self_attn.v_proj": 16, + "layer.9.self_attn.o_proj": 16, + "layer.9.mlp.gate_proj": 16, + "layer.9.mlp.up_proj": 16, + "layer.9.mlp.down_proj": 16, + "layer.10.self_attn.q_proj": 16, + "layer.10.self_attn.k_proj": 16, + "layer.10.self_attn.v_proj": 16, + "layer.10.self_attn.o_proj": 16, + "layer.10.mlp.gate_proj": 16, + "layer.10.mlp.up_proj": 16, + "layer.10.mlp.down_proj": 16, + "layer.11.self_attn.q_proj": 16, + "layer.11.self_attn.k_proj": 16, + "layer.11.self_attn.v_proj": 16, + "layer.11.self_attn.o_proj": 16, + "layer.11.mlp.gate_proj": 16, + "layer.11.mlp.up_proj": 16, + "layer.11.mlp.down_proj": 16, + "layer.12.self_attn.q_proj": 16, + "layer.12.self_attn.k_proj": 16, + "layer.12.self_attn.v_proj": 16, + "layer.12.self_attn.o_proj": 16, + "layer.12.mlp.gate_proj": 16, + "layer.12.mlp.up_proj": 16, + "layer.12.mlp.down_proj": 16, + "layer.13.self_attn.q_proj": 16, + "layer.13.self_attn.k_proj": 16, + "layer.13.self_attn.v_proj": 16, + "layer.13.self_attn.o_proj": 16, + "layer.13.mlp.gate_proj": 16, + "layer.13.mlp.up_proj": 16, + "layer.13.mlp.down_proj": 16, + "layer.14.self_attn.q_proj": 16, + "layer.14.self_attn.k_proj": 16, + "layer.14.self_attn.v_proj": 16, + "layer.14.self_attn.o_proj": 16, + "layer.14.mlp.gate_proj": 16, + "layer.14.mlp.up_proj": 16, + "layer.14.mlp.down_proj": 16, + "layer.15.self_attn.q_proj": 16, + "layer.15.self_attn.k_proj": 16, + "layer.15.self_attn.v_proj": 16, + "layer.15.self_attn.o_proj": 16, + "layer.15.mlp.gate_proj": 16, + "layer.15.mlp.up_proj": 16, + "layer.15.mlp.down_proj": 16 + } + } +] \ No newline at end of file