Synchronizing local compiler cache.
Browse files- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/1be46ac7c7fbd3ee3e56.json +77 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_13d43d0be39ff7b1d66f+bfe5714b/model.hlo_module.pb +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_13d43d0be39ff7b1d66f+bfe5714b/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_5dc7b4d69bf22af2563b+165e9558/model.hlo_module.pb +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_5dc7b4d69bf22af2563b+165e9558/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_5dc7b4d69bf22af2563b+165e9558/wrapped_neff.hlo +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_6a90845afeaebacf0657+165e9558/model.hlo_module.pb +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_869f8ba284bcbaf85b47+165e9558/model.hlo_module.pb +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_869f8ba284bcbaf85b47+165e9558/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_869f8ba284bcbaf85b47+165e9558/wrapped_neff.hlo +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_ae3b9f9004e7c7b6217d+bfe5714b/model.hlo_module.pb +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_ae3b9f9004e7c7b6217d+bfe5714b/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_f303673a5c27cd35d2e8+165e9558/model.hlo_module.pb +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_f303673a5c27cd35d2e8+165e9558/model.neff +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_f303673a5c27cd35d2e8+165e9558/wrapped_neff.hlo +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_f4c8e9585d2c42f74ede+bfe5714b/model.hlo_module.pb +1 -1
- neuronxcc-2.17.194.0+d312836f/MODULE_f4c8e9585d2c42f74ede+bfe5714b/model.neff +1 -1
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev7/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/1be46ac7c7fbd3ee3e56.json
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 128,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 4096,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 14336,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"async_mode": false,
|
| 21 |
+
"attn_kernel_enabled": false,
|
| 22 |
+
"batch_size": 16,
|
| 23 |
+
"capacity_factor": null,
|
| 24 |
+
"cc_pipeline_tiling_factor": 2,
|
| 25 |
+
"checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
|
| 26 |
+
"checkpoint_revision": "6a6f4aa4197940add57724a7707d069478df56b1",
|
| 27 |
+
"continuous_batching": true,
|
| 28 |
+
"enable_bucketing": false,
|
| 29 |
+
"ep_degree": 1,
|
| 30 |
+
"flash_decoding_enabled": false,
|
| 31 |
+
"fused_qkv": true,
|
| 32 |
+
"glu_mlp": true,
|
| 33 |
+
"is_chunked_prefill": false,
|
| 34 |
+
"local_ranks_size": 8,
|
| 35 |
+
"logical_nc_config": 1,
|
| 36 |
+
"max_batch_size": 16,
|
| 37 |
+
"max_context_length": 4096,
|
| 38 |
+
"max_topk": 256,
|
| 39 |
+
"mlp_kernel_enabled": false,
|
| 40 |
+
"mlp_kernel_fuse_residual_add": false,
|
| 41 |
+
"n_active_tokens": 4096,
|
| 42 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
| 43 |
+
"num_cores_per_group": 1,
|
| 44 |
+
"on_device_sampling": true,
|
| 45 |
+
"optimum_neuron_version": "0.2.0.dev7",
|
| 46 |
+
"output_logits": false,
|
| 47 |
+
"padding_side": "right",
|
| 48 |
+
"pp_degree": 1,
|
| 49 |
+
"qk_layernorm": false,
|
| 50 |
+
"qkv_kernel_enabled": false,
|
| 51 |
+
"rpl_reduce_dtype": "bfloat16",
|
| 52 |
+
"sequence_length": 4096,
|
| 53 |
+
"sequence_parallel_enabled": false,
|
| 54 |
+
"speculation_length": 0,
|
| 55 |
+
"start_rank_id": 0,
|
| 56 |
+
"target": null,
|
| 57 |
+
"torch_dtype": "bfloat16",
|
| 58 |
+
"tp_degree": 8,
|
| 59 |
+
"vocab_parallel": false
|
| 60 |
+
},
|
| 61 |
+
"num_attention_heads": 32,
|
| 62 |
+
"num_hidden_layers": 32,
|
| 63 |
+
"num_key_value_heads": 8,
|
| 64 |
+
"pretraining_tp": 1,
|
| 65 |
+
"rms_norm_eps": 1e-05,
|
| 66 |
+
"rope_scaling": {
|
| 67 |
+
"factor": 8.0,
|
| 68 |
+
"high_freq_factor": 4.0,
|
| 69 |
+
"low_freq_factor": 1.0,
|
| 70 |
+
"original_max_position_embeddings": 8192,
|
| 71 |
+
"rope_type": "llama3"
|
| 72 |
+
},
|
| 73 |
+
"rope_theta": 500000.0,
|
| 74 |
+
"tie_word_embeddings": false,
|
| 75 |
+
"use_cache": true,
|
| 76 |
+
"vocab_size": 128256
|
| 77 |
+
}
|
neuronxcc-2.17.194.0+d312836f/MODULE_13d43d0be39ff7b1d66f+bfe5714b/model.hlo_module.pb
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 919863
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d88626922cb8213ee6e6d92ed79372583a5764e99c2b8db9df44986854b3c13
|
| 3 |
size 919863
|
neuronxcc-2.17.194.0+d312836f/MODULE_13d43d0be39ff7b1d66f+bfe5714b/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 32646144
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5313baebec9cd52b2aef524565824a15ca9b21956cf2e144445f75acc8cedd9
|
| 3 |
size 32646144
|
neuronxcc-2.17.194.0+d312836f/MODULE_5dc7b4d69bf22af2563b+165e9558/model.hlo_module.pb
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 776785
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ad1f4ff795798cade75282ae94a7ac55f76202dc01647366eafa41bfc21cc9b
|
| 3 |
size 776785
|
neuronxcc-2.17.194.0+d312836f/MODULE_5dc7b4d69bf22af2563b+165e9558/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4619264
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:51cbd827c200db4053c928cbfda9df76574cd38f07753405bdf8402e844e8fcf
|
| 3 |
size 4619264
|
neuronxcc-2.17.194.0+d312836f/MODULE_5dc7b4d69bf22af2563b+165e9558/wrapped_neff.hlo
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4757208
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:98bb3c5284bfca0e61ec7f73a869320ae27c5ecb4e25f81913b4b346403a184e
|
| 3 |
size 4757208
|
neuronxcc-2.17.194.0+d312836f/MODULE_6a90845afeaebacf0657+165e9558/model.hlo_module.pb
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 756302
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a248564bc8d77e6620b1bedf41885c8476b5052a11425ae8c2d2a2c41edbc58
|
| 3 |
size 756302
|
neuronxcc-2.17.194.0+d312836f/MODULE_869f8ba284bcbaf85b47+165e9558/model.hlo_module.pb
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 776801
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:904d71957b19028da2c781a63244b59fc29f47f35686eaa251e1e3e587c423a5
|
| 3 |
size 776801
|
neuronxcc-2.17.194.0+d312836f/MODULE_869f8ba284bcbaf85b47+165e9558/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5889024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5104527371912f7a274041040be4a5fca1ff3aa1d9dbab8ea1dd8be64a077275
|
| 3 |
size 5889024
|
neuronxcc-2.17.194.0+d312836f/MODULE_869f8ba284bcbaf85b47+165e9558/wrapped_neff.hlo
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6026968
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:85bfb3654b5a53f4648114d5da0adfde5940b94af89912002ac49b460aee5bc9
|
| 3 |
size 6026968
|
neuronxcc-2.17.194.0+d312836f/MODULE_ae3b9f9004e7c7b6217d+bfe5714b/model.hlo_module.pb
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 919863
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce184a495c188410ffa88943169eaa42ff53fe4af26e4320761121151a315f5c
|
| 3 |
size 919863
|
neuronxcc-2.17.194.0+d312836f/MODULE_ae3b9f9004e7c7b6217d+bfe5714b/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 32646144
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f5d3e0bf2e68808fee89a68129dd26730b841de2303073bb99a9c784b78d350
|
| 3 |
size 32646144
|
neuronxcc-2.17.194.0+d312836f/MODULE_f303673a5c27cd35d2e8+165e9558/model.hlo_module.pb
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 776785
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:13271c54678f04fa24e2be29ff6f06a03d085a9626eb7fe5fa08786e2734fb9a
|
| 3 |
size 776785
|
neuronxcc-2.17.194.0+d312836f/MODULE_f303673a5c27cd35d2e8+165e9558/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5192704
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e6a507d84bc08456b9be7df735f1feb3da9afecb2d3be0ab4277795bdacf9d4f
|
| 3 |
size 5192704
|
neuronxcc-2.17.194.0+d312836f/MODULE_f303673a5c27cd35d2e8+165e9558/wrapped_neff.hlo
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5330648
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1371f131b28cf6f074e0f3264e9d7487016c4e4e8ee7b4f80980c07d84067358
|
| 3 |
size 5330648
|
neuronxcc-2.17.194.0+d312836f/MODULE_f4c8e9585d2c42f74ede+bfe5714b/model.hlo_module.pb
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 919863
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:514435fea3eb282a99100018f5e7df88beca2bc96c0231a502cd289bd871301a
|
| 3 |
size 919863
|
neuronxcc-2.17.194.0+d312836f/MODULE_f4c8e9585d2c42f74ede+bfe5714b/model.neff
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 32646144
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef04be1a5382cc99d58226d5a88981bc93d29da18f62e79529a3a3892843c62c
|
| 3 |
size 32646144
|