Bc-AI commited on
Commit
a42db8f
·
verified ·
1 Parent(s): dbec0a1

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -41,3 +41,6 @@ best_1/ocdbt.process_0/d/9f29e0b940fd9a6b36905f153e45f998 filter=lfs diff=lfs me
41
  checkpoints/best_1/ocdbt.process_0/d/63d8533980738b1eb4b132f935fca4fa filter=lfs diff=lfs merge=lfs -text
42
  checkpoints/best_1/ocdbt.process_0/d/7ecf433b033ee3ac9e35a2bb7516f0a1 filter=lfs diff=lfs merge=lfs -text
43
  checkpoints/best_1/ocdbt.process_0/d/9f29e0b940fd9a6b36905f153e45f998 filter=lfs diff=lfs merge=lfs -text
 
 
 
 
41
  checkpoints/best_1/ocdbt.process_0/d/63d8533980738b1eb4b132f935fca4fa filter=lfs diff=lfs merge=lfs -text
42
  checkpoints/best_1/ocdbt.process_0/d/7ecf433b033ee3ac9e35a2bb7516f0a1 filter=lfs diff=lfs merge=lfs -text
43
  checkpoints/best_1/ocdbt.process_0/d/9f29e0b940fd9a6b36905f153e45f998 filter=lfs diff=lfs merge=lfs -text
44
+ checkpoints/best_3/ocdbt.process_0/d/b8d7939c37252e2b3d5a7e68ee214fc0 filter=lfs diff=lfs merge=lfs -text
45
+ checkpoints/best_3/ocdbt.process_0/d/e327a7eb4404a128ff897bb7a70e6ea1 filter=lfs diff=lfs merge=lfs -text
46
+ checkpoints/best_3/ocdbt.process_0/d/eaf2aaa36d4cb03aaec58caba6b74b86 filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SAM1-600M
2
+
3
+ ## Chat Template
4
+ ```
5
+ User: {{input}}
6
+ Sam: {{output}}
7
+ ```
8
+
9
+ ## Model Stats
10
+ - Parameters: 348,357,632 (~348.4M)
11
+ - Architecture: 24L × 1024d × 16H
12
+ - Final Perplexity: 4.81
13
+ - Final Accuracy: 80.34%
14
+
15
+ ## Usage
16
+ ```python
17
+ from transformers import AutoTokenizer, AutoModelForCausalLM
18
+
19
+ tokenizer = AutoTokenizer.from_pretrained("YOUR_USERNAME/sam1-600m")
20
+ model = AutoModelForCausalLM.from_pretrained("YOUR_USERNAME/sam1-600m")
21
+
22
+ prompt = "User: Hello!\nSam:"
23
+ inputs = tokenizer(prompt, return_tensors="pt")
24
+ outputs = model.generate(**inputs, max_length=100)
25
+ print(tokenizer.decode(outputs[0]))
26
+ ```
checkpoints/best_3/_CHECKPOINT_METADATA ADDED
@@ -0,0 +1 @@
 
 
1
+ {"item_handlers": "orbax.checkpoint._src.handlers.pytree_checkpoint_handler.PyTreeCheckpointHandler", "metrics": {}, "performance_metrics": {}, "init_timestamp_nsecs": 1762419581477771668, "commit_timestamp_nsecs": 1762419594497402341, "custom_metadata": {}}
checkpoints/best_3/_METADATA ADDED
@@ -0,0 +1 @@
 
 
1
+ {"tree_metadata": {"('embed_tokens', 'embedding')": {"key_metadata": [{"key": "embed_tokens", "key_type": 2}, {"key": "embedding", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [50264, 1024]}}, "('layers_0', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_0', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_0', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_0', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_0', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_0', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2560, 1024]}}, "('layers_0', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_0', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_0', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_0", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_1', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_1', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_1', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_1', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_1', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_1", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_1', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_1", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2560, 1024]}}, "('layers_1', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_1", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_1', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_1", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_1', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_1", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_10', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_10", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_10', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_10", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_10', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_10", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_10', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_10", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_10', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_10", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_10', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_10", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2560, 1024]}}, "('layers_10', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_10", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_10', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_10", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_10', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_10", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_11', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_11", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_11', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_11", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_11', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_11", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_11', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_11", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_11', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_11", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_11', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_11", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2560, 1024]}}, "('layers_11', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_11", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_11', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_11", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_11', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_11", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_12', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_12', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_12', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_12', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_12', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_12', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2560, 1024]}}, "('layers_12', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_12', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_12', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_12", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_13', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_13", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_13', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_13", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_13', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_13", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_13', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_13", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_13', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_13", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_13', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_13", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2560, 1024]}}, "('layers_13', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_13", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_13', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_13", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_13', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_13", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_14', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_14", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_14', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_14", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_14', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_14", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_14', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_14", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_14', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_14", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_14', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_14", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2560, 1024]}}, "('layers_14', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_14", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_14', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_14", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_14', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_14", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_15', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_15', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_15', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_15', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_15', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_15', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2560, 1024]}}, "('layers_15', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_15', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_15', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_15", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_16', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_16', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_16', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_16', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_16', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_16', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2560, 1024]}}, "('layers_16', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_16', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_16', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_16", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_17', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_17", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_17', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_17", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_17', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_17", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_17', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_17", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_17', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_17", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_17', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_17", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2560, 1024]}}, "('layers_17', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_17", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_17', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_17", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_17', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_17", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_18', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_18', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_18', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_18', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_18', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_18', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2560, 1024]}}, "('layers_18', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_18', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_18', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_18", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_19', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_19", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_19', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_19", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_19', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_19", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_19', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_19", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_19', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_19", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_19', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_19", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2560, 1024]}}, "('layers_19', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_19", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_19', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_19", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_19', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_19", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_2', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_2', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_2', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_2', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_2', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_2", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_2', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_2", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2560, 1024]}}, "('layers_2', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_2", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_2', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_2", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_2', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_2", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_20', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_20', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_20', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_20', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_20', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_20', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2560, 1024]}}, "('layers_20', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_20', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_20', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_20", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_21', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_21', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_21', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_21', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_21', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_21', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2560, 1024]}}, "('layers_21', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_21', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_21', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_21", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_22', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_22", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_22', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_22", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_22', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_22", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_22', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_22", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_22', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_22", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_22', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_22", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2560, 1024]}}, "('layers_22', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_22", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_22', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_22", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_22', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_22", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_23', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_23", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_23', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_23", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_23', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_23", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_23', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_23", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_23', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_23", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_23', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_23", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2560, 1024]}}, "('layers_23', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_23", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_23', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_23", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_23', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_23", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_3', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_3', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_3', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_3', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_3', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_3', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2560, 1024]}}, "('layers_3', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_3', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_3', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_3", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_4', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_4', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_4', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_4', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_4', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_4', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2560, 1024]}}, "('layers_4', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_4', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_4', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_4", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_5', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_5', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_5', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_5', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_5', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_5", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_5', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_5", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2560, 1024]}}, "('layers_5', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_5", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_5', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_5", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_5', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_5", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_6', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_6', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_6', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_6', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_6', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_6', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2560, 1024]}}, "('layers_6', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_6', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_6', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_6", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_7', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_7', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_7', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_7', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_7", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_7', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_7", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_7', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_7", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2560, 1024]}}, "('layers_7', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_7", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_7', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_7", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_7', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_7", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_8', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_8', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_8', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_8', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_8', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_8', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2560, 1024]}}, "('layers_8', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_8', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_8', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_8", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_9', 'attn', 'k_proj', 'kernel')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_9', 'attn', 'o_proj', 'kernel')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "o_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_9', 'attn', 'q_proj', 'kernel')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 1024]}}, "('layers_9', 'attn', 'v_proj', 'kernel')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 128]}}, "('layers_9', 'attn_norm', 'scale')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "attn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('layers_9', 'ffn', 'down_proj', 'kernel')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "down_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2560, 1024]}}, "('layers_9', 'ffn', 'gate_proj', 'kernel')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "gate_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_9', 'ffn', 'up_proj', 'kernel')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "ffn", "key_type": 2}, {"key": "up_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 2560]}}, "('layers_9', 'ffn_norm', 'scale')": {"key_metadata": [{"key": "layers_9", "key_type": 2}, {"key": "ffn_norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('lm_head', 'kernel')": {"key_metadata": [{"key": "lm_head", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 50264]}}, "('norm', 'scale')": {"key_metadata": [{"key": "norm", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}}, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
checkpoints/best_3/_sharding ADDED
@@ -0,0 +1 @@
 
 
1
+ {"ZW1iZWRfdG9rZW5zLmVtYmVkZGluZw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bG1faGVhZC5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzAuYXR0bi52X3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzAuYXR0bi5rX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzAuYXR0bi5vX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzAuYXR0bi5xX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzAuYXR0bl9ub3JtLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzAuZmZuLmRvd25fcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzAuZmZuLmdhdGVfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzAuZmZuLnVwX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzAuZmZuX25vcm0uc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE0LmF0dG4ua19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE0LmF0dG4ub19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE0LmF0dG4ucV9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE0LmF0dG4udl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE0LmF0dG5fbm9ybS5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE0LmZmbi51cF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE0LmZmbi5kb3duX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE0LmZmbi5nYXRlX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE0LmZmbl9ub3JtLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE1LmF0dG4ua19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE1LmF0dG4ub19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE1LmF0dG4ucV9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE1LmF0dG4udl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE1LmF0dG5fbm9ybS5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE1LmZmbi51cF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE1LmZmbi5kb3duX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE1LmZmbi5nYXRlX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE1LmZmbl9ub3JtLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE2LmF0dG4ua19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE2LmF0dG4ub19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE2LmF0dG4ucV9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE2LmF0dG4udl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE2LmF0dG5fbm9ybS5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE2LmZmbi51cF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE2LmZmbi5kb3duX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE2LmZmbi5nYXRlX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE2LmZmbl9ub3JtLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE3LmF0dG4ua19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE3LmF0dG4ub19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE3LmF0dG4ucV9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE3LmF0dG4udl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE3LmF0dG5fbm9ybS5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE3LmZmbi51cF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE3LmZmbi5kb3duX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE3LmZmbi5nYXRlX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE3LmZmbl9ub3JtLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE4LmF0dG4ua19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE4LmF0dG4ub19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE4LmF0dG4ucV9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE4LmF0dG4udl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE4LmF0dG5fbm9ybS5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE4LmZmbi51cF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE4LmZmbi5kb3duX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE4LmZmbi5nYXRlX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE4LmZmbl9ub3JtLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE5LmF0dG4ua19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE5LmF0dG4ub19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE5LmF0dG4ucV9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE5LmF0dG4udl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE5LmF0dG5fbm9ybS5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE5LmZmbi51cF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE5LmZmbi5kb3duX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE5LmZmbi5nYXRlX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzE5LmZmbl9ub3JtLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEuYXR0bi52X3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEuYXR0bi5rX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEuYXR0bi5vX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEuYXR0bi5xX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEuYXR0bl9ub3JtLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEuZmZuLmRvd25fcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEuZmZuLmdhdGVfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEuZmZuLnVwX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEuZmZuX25vcm0uc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEwLmF0dG4ua19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEwLmF0dG4ub19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEwLmF0dG4ucV9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEwLmF0dG4udl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEwLmF0dG5fbm9ybS5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEwLmZmbi51cF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEwLmZmbi5kb3duX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEwLmZmbi5nYXRlX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEwLmZmbl9ub3JtLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzExLmF0dG4ua19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzExLmF0dG4ub19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzExLmF0dG4ucV9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzExLmF0dG4udl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzExLmF0dG5fbm9ybS5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzExLmZmbi51cF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzExLmZmbi5kb3duX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzExLmZmbi5nYXRlX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzExLmZmbl9ub3JtLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEyLmF0dG4ua19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEyLmF0dG4ub19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEyLmF0dG4ucV9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEyLmF0dG4udl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEyLmF0dG5fbm9ybS5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEyLmZmbi51cF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEyLmZmbi5kb3duX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEyLmZmbi5nYXRlX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEyLmZmbl9ub3JtLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEzLmF0dG4ua19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEzLmF0dG4ub19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEzLmF0dG4ucV9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEzLmF0dG4udl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEzLmF0dG5fbm9ybS5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEzLmZmbi51cF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEzLmZmbi5kb3duX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEzLmZmbi5nYXRlX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzEzLmZmbl9ub3JtLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIuYXR0bi52X3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIuYXR0bi5rX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIuYXR0bi5vX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIuYXR0bi5xX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIuYXR0bl9ub3JtLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIuZmZuLmRvd25fcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIuZmZuLmdhdGVfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIuZmZuLnVwX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIuZmZuX25vcm0uc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIwLmF0dG4ua19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIwLmF0dG4ub19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIwLmF0dG4ucV9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIwLmF0dG4udl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIwLmF0dG5fbm9ybS5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIwLmZmbi51cF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIwLmZmbi5kb3duX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIwLmZmbi5nYXRlX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIwLmZmbl9ub3JtLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIxLmF0dG4ua19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIxLmF0dG4ub19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIxLmF0dG4ucV9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIxLmF0dG4udl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIxLmF0dG5fbm9ybS5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIxLmZmbi51cF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIxLmZmbi5kb3duX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIxLmZmbi5nYXRlX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIxLmZmbl9ub3JtLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIyLmF0dG4ua19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIyLmF0dG4ub19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIyLmF0dG4ucV9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIyLmF0dG4udl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIyLmF0dG5fbm9ybS5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIyLmZmbi51cF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIyLmZmbi5kb3duX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIyLmZmbi5nYXRlX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIyLmZmbl9ub3JtLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIzLmF0dG4ua19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIzLmF0dG4ub19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIzLmF0dG4ucV9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIzLmF0dG4udl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIzLmF0dG5fbm9ybS5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIzLmZmbi51cF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIzLmZmbi5kb3duX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIzLmZmbi5nYXRlX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzIzLmZmbl9ub3JtLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzMuYXR0bi52X3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzMuYXR0bi5rX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzMuYXR0bi5vX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzMuYXR0bi5xX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzMuYXR0bl9ub3JtLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzMuZmZuLmRvd25fcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzMuZmZuLmdhdGVfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzMuZmZuLnVwX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzMuZmZuX25vcm0uc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzQuYXR0bi52X3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzQuYXR0bi5rX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzQuYXR0bi5vX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzQuYXR0bi5xX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzQuYXR0bl9ub3JtLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzQuZmZuLmRvd25fcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzQuZmZuLmdhdGVfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzQuZmZuLnVwX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzQuZmZuX25vcm0uc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzUuYXR0bi52X3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzUuYXR0bi5rX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzUuYXR0bi5vX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzUuYXR0bi5xX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzUuYXR0bl9ub3JtLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzUuZmZuLmRvd25fcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzUuZmZuLmdhdGVfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzUuZmZuLnVwX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzUuZmZuX25vcm0uc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzYuYXR0bi52X3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzYuYXR0bi5rX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzYuYXR0bi5vX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzYuYXR0bi5xX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzYuYXR0bl9ub3JtLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzYuZmZuLmRvd25fcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzYuZmZuLmdhdGVfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzYuZmZuLnVwX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzYuZmZuX25vcm0uc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzcuYXR0bi52X3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzcuYXR0bi5rX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzcuYXR0bi5vX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzcuYXR0bi5xX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzcuYXR0bl9ub3JtLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzcuZmZuLmRvd25fcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzcuZmZuLmdhdGVfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzcuZmZuLnVwX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzcuZmZuX25vcm0uc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzguYXR0bi52X3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzguYXR0bi5rX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzguYXR0bi5vX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzguYXR0bi5xX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzguYXR0bl9ub3JtLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzguZmZuLmRvd25fcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzguZmZuLmdhdGVfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzguZmZuLnVwX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzguZmZuX25vcm0uc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzkuYXR0bi52X3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzkuYXR0bi5rX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzkuYXR0bi5vX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzkuYXR0bi5xX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzkuYXR0bl9ub3JtLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzkuZmZuLmRvd25fcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzkuZmZuLmdhdGVfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzkuZmZuLnVwX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bGF5ZXJzXzkuZmZuX25vcm0uc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}","bm9ybS5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TPU_0(process=0,(0,0,0,0))\"}"}
checkpoints/best_3/array_metadatas/process_0 ADDED
@@ -0,0 +1 @@
 
 
1
+ {"array_metadatas": [{"array_metadata": {"param_name": "embed_tokens.embedding", "write_shape": [50264, 1024], "chunk_shape": [50264, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_0.attn.k_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_0.attn.o_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_0.attn.q_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_0.attn.v_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_0.attn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_0.ffn.down_proj.kernel", "write_shape": [2560, 1024], "chunk_shape": [2560, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_0.ffn.gate_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_0.ffn.up_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_0.ffn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_1.attn.k_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_1.attn.o_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_1.attn.q_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_1.attn.v_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_1.attn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_1.ffn.down_proj.kernel", "write_shape": [2560, 1024], "chunk_shape": [2560, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_1.ffn.gate_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_1.ffn.up_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_1.ffn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_10.attn.k_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_10.attn.o_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_10.attn.q_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_10.attn.v_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_10.attn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_10.ffn.down_proj.kernel", "write_shape": [2560, 1024], "chunk_shape": [2560, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_10.ffn.gate_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_10.ffn.up_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_10.ffn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_11.attn.k_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_11.attn.o_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_11.attn.q_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_11.attn.v_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_11.attn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_11.ffn.down_proj.kernel", "write_shape": [2560, 1024], "chunk_shape": [2560, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_11.ffn.gate_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_11.ffn.up_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_11.ffn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_12.attn.k_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_12.attn.o_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_12.attn.q_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_12.attn.v_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_12.attn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_12.ffn.down_proj.kernel", "write_shape": [2560, 1024], "chunk_shape": [2560, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_12.ffn.gate_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_12.ffn.up_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_12.ffn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_13.attn.k_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_13.attn.o_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_13.attn.q_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_13.attn.v_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_13.attn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_13.ffn.down_proj.kernel", "write_shape": [2560, 1024], "chunk_shape": [2560, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_13.ffn.gate_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_13.ffn.up_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_13.ffn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_14.attn.k_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_14.attn.o_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_14.attn.q_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_14.attn.v_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_14.attn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_14.ffn.down_proj.kernel", "write_shape": [2560, 1024], "chunk_shape": [2560, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_14.ffn.gate_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_14.ffn.up_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_14.ffn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_15.attn.k_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_15.attn.o_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_15.attn.q_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_15.attn.v_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_15.attn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_15.ffn.down_proj.kernel", "write_shape": [2560, 1024], "chunk_shape": [2560, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_15.ffn.gate_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_15.ffn.up_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_15.ffn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_16.attn.k_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_16.attn.o_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_16.attn.q_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_16.attn.v_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_16.attn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_16.ffn.down_proj.kernel", "write_shape": [2560, 1024], "chunk_shape": [2560, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_16.ffn.gate_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_16.ffn.up_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_16.ffn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_17.attn.k_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_17.attn.o_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_17.attn.q_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_17.attn.v_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_17.attn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_17.ffn.down_proj.kernel", "write_shape": [2560, 1024], "chunk_shape": [2560, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_17.ffn.gate_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_17.ffn.up_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_17.ffn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_18.attn.k_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_18.attn.o_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_18.attn.q_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_18.attn.v_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_18.attn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_18.ffn.down_proj.kernel", "write_shape": [2560, 1024], "chunk_shape": [2560, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_18.ffn.gate_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_18.ffn.up_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_18.ffn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_19.attn.k_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_19.attn.o_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_19.attn.q_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_19.attn.v_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_19.attn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_19.ffn.down_proj.kernel", "write_shape": [2560, 1024], "chunk_shape": [2560, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_19.ffn.gate_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_19.ffn.up_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_19.ffn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_2.attn.k_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_2.attn.o_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_2.attn.q_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_2.attn.v_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_2.attn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_2.ffn.down_proj.kernel", "write_shape": [2560, 1024], "chunk_shape": [2560, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_2.ffn.gate_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_2.ffn.up_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_2.ffn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_20.attn.k_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_20.attn.o_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_20.attn.q_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_20.attn.v_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_20.attn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_20.ffn.down_proj.kernel", "write_shape": [2560, 1024], "chunk_shape": [2560, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_20.ffn.gate_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_20.ffn.up_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_20.ffn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_21.attn.k_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_21.attn.o_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_21.attn.q_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_21.attn.v_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_21.attn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_21.ffn.down_proj.kernel", "write_shape": [2560, 1024], "chunk_shape": [2560, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_21.ffn.gate_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_21.ffn.up_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_21.ffn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_22.attn.k_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_22.attn.o_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_22.attn.q_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_22.attn.v_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_22.attn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_22.ffn.down_proj.kernel", "write_shape": [2560, 1024], "chunk_shape": [2560, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_22.ffn.gate_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_22.ffn.up_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_22.ffn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_23.attn.k_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_23.attn.o_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_23.attn.q_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_23.attn.v_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_23.attn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_23.ffn.down_proj.kernel", "write_shape": [2560, 1024], "chunk_shape": [2560, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_23.ffn.gate_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_23.ffn.up_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_23.ffn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_3.attn.k_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_3.attn.o_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_3.attn.q_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_3.attn.v_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_3.attn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_3.ffn.down_proj.kernel", "write_shape": [2560, 1024], "chunk_shape": [2560, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_3.ffn.gate_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_3.ffn.up_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_3.ffn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_4.attn.k_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_4.attn.o_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_4.attn.q_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_4.attn.v_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_4.attn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_4.ffn.down_proj.kernel", "write_shape": [2560, 1024], "chunk_shape": [2560, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_4.ffn.gate_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_4.ffn.up_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_4.ffn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_5.attn.k_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_5.attn.o_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_5.attn.q_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_5.attn.v_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_5.attn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_5.ffn.down_proj.kernel", "write_shape": [2560, 1024], "chunk_shape": [2560, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_5.ffn.gate_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_5.ffn.up_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_5.ffn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_6.attn.k_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_6.attn.o_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_6.attn.q_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_6.attn.v_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_6.attn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_6.ffn.down_proj.kernel", "write_shape": [2560, 1024], "chunk_shape": [2560, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_6.ffn.gate_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_6.ffn.up_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_6.ffn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_7.attn.k_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_7.attn.o_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_7.attn.q_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_7.attn.v_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_7.attn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_7.ffn.down_proj.kernel", "write_shape": [2560, 1024], "chunk_shape": [2560, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_7.ffn.gate_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_7.ffn.up_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_7.ffn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_8.attn.k_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_8.attn.o_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_8.attn.q_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_8.attn.v_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_8.attn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_8.ffn.down_proj.kernel", "write_shape": [2560, 1024], "chunk_shape": [2560, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_8.ffn.gate_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_8.ffn.up_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_8.ffn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_9.attn.k_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_9.attn.o_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_9.attn.q_proj.kernel", "write_shape": [1024, 1024], "chunk_shape": [1024, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_9.attn.v_proj.kernel", "write_shape": [1024, 128], "chunk_shape": [1024, 128], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_9.attn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_9.ffn.down_proj.kernel", "write_shape": [2560, 1024], "chunk_shape": [2560, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_9.ffn.gate_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_9.ffn.up_proj.kernel", "write_shape": [1024, 2560], "chunk_shape": [1024, 2560], "ext_metadata": null}}, {"array_metadata": {"param_name": "layers_9.ffn_norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "lm_head.kernel", "write_shape": [1024, 50264], "chunk_shape": [1024, 50264], "ext_metadata": null}}, {"array_metadata": {"param_name": "norm.scale", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}]}
checkpoints/best_3/d/9a6cd4e23cc437560f4be43e407544a4 ADDED
Binary file (2.42 kB). View file
 
checkpoints/best_3/manifest.ocdbt ADDED
Binary file (118 Bytes). View file
 
checkpoints/best_3/ocdbt.process_0/d/1f41e6f008d87b3a50ab2046a8e1280f ADDED
Binary file (7.37 kB). View file
 
checkpoints/best_3/ocdbt.process_0/d/46e2551f5dd7536d3e81dd47070dc745 ADDED
Binary file (466 Bytes). View file
 
checkpoints/best_3/ocdbt.process_0/d/6e762a96809abacf91891c20dd7ee254 ADDED
Binary file (548 Bytes). View file
 
checkpoints/best_3/ocdbt.process_0/d/891959d1e11f5ef2a7547a657abf24c4 ADDED
Binary file (494 Bytes). View file
 
checkpoints/best_3/ocdbt.process_0/d/b8d7939c37252e2b3d5a7e68ee214fc0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c89525f2da8cd8ddca1da6703c10ad9a66611526d11bef460357fb9da7c7f2e
3
+ size 186044416
checkpoints/best_3/ocdbt.process_0/d/d4e739ac4385d955f1ad54a73ca010f5 ADDED
Binary file (197 Bytes). View file
 
checkpoints/best_3/ocdbt.process_0/d/e327a7eb4404a128ff897bb7a70e6ea1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d93cb0e7bdfe4e5658288136db0cccbf511962b1e8ac5ff0a4632816c5d4cb7
3
+ size 46616576
checkpoints/best_3/ocdbt.process_0/d/eaf2aaa36d4cb03aaec58caba6b74b86 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c563b110f82595c0412cf6d2406b7abac3afdc73cc612e53851cb896449976c
3
+ size 1052184576
checkpoints/best_3/ocdbt.process_0/manifest.ocdbt ADDED
Binary file (428 Bytes). View file
 
config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "sam1",
3
+ "vocab_size": 50264,
4
+ "d_model": 1024,
5
+ "n_layers": 24,
6
+ "n_heads": 16,
7
+ "n_kv_heads": 2,
8
+ "ff_dim": 2560,
9
+ "max_len": 1024,
10
+ "dropout": 0.1,
11
+ "rope_theta": 10000.0,
12
+ "use_yarn": true,
13
+ "yarn_scale": 1.0,
14
+ "yarn_alpha": 1.0,
15
+ "yarn_beta": 32.0,
16
+ "use_alibi": true,
17
+ "alibi_weight": 0.3,
18
+ "architectures": [
19
+ "SAM1ForCausalLM"
20
+ ],
21
+ "bos_token_id": 50256,
22
+ "eos_token_id": 50256
23
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac99b19907200d6decca70946bc7e4dca2263a840dfa765f40b73ca1242da8e8
3
+ size 1393452904
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "tokenizer_class": "GPT2Tokenizer",
3
+ "bos_token": "<|endoftext|>",
4
+ "eos_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>",
6
+ "pad_token": "<|endoftext|>",
7
+ "add_prefix_space": false,
8
+ "model_max_length": 1024,
9
+ "vocab_size": 50264,
10
+ "chat_template": "User: {{input}}\nSam: {{output}}"
11
+ }
training_history.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ train_loss,train_ce,train_z,train_acc,train_ppl,val_loss,val_ce,val_z,val_acc,val_ppl
2
+ 2.985950469970703,2.973674774169922,0.01227566134184599,0.5944549441337585,315.6730041503906,2.0501346588134766,2.04276704788208,0.007367574144154787,0.7185661196708679,8.133447647094727
3
+ 1.7689799070358276,1.7676150798797607,0.001364821451716125,0.7626737356185913,13.322831153869629,1.5867314338684082,1.5862715244293213,0.00045985079486854374,0.7947856187820435,5.030083179473877
4
+ 1.5924392938613892,1.5919979810714722,0.000441351585322991,0.7927579879760742,6.2361860275268555,1.5429564714431763,1.5425934791564941,0.0003629381244536489,0.803358793258667,4.80610466003418