meta-llama-Llama-3.2-1B-neuron / neuron_config.json
badaoui's picture
badaoui HF Staff
Add Neuron-optimized files for meta-llama/Llama-3.2-1B
c447db0 verified
{
"_serialized_key": "NxDNeuronConfig",
"batch_size": 1,
"capacity_factor": null,
"checkpoint_id": "meta-llama/Llama-3.2-1B",
"checkpoint_revision": "4e20de362430cd3b72f300e6b0f18e50e7166e08",
"continuous_batching": false,
"ep_degree": 1,
"fused_qkv": true,
"glu_mlp": true,
"local_ranks_size": 4,
"max_batch_size": 1,
"max_context_length": 128,
"max_topk": 256,
"n_active_tokens": 128,
"neuronxcc_version": "2.21.18209.0+043b1bf7",
"on_device_sampling": true,
"optimum_neuron_version": "0.4.1",
"output_logits": false,
"pp_degree": 1,
"sequence_length": 128,
"speculation_length": 0,
"start_rank_id": 0,
"target": "inf2",
"torch_dtype": "bfloat16",
"tp_degree": 4
}