aws-neuron
/

optimum-neuron-cache

dacorvo HF Staff commited on Aug 8, 2025

Commit

28ae1e4

verified ·

1 Parent(s): b76c1a7

Update inference-cache-config/smollm3.json

Files changed (1) hide show

inference-cache-config/smollm3.json CHANGED Viewed

@@ -1,21 +1,13 @@
 {
-   "HuggingFaceTB/SmolLM-360M-Instruct": [
-    {
       "batch_size": 1,
       "sequence_length": 4096,
       "num_cores": 2,
       "auto_cast_type": "bf16"
     },
-    {
-      "batch_size": 4,
-      "sequence_length": 4096,
-      "num_cores": 2,
-      "auto_cast_type": "bf16"
-    }
-  ],
-  "HuggingFaceTB/SmolLM-3B-Instruct": [
    {
-      "batch_size": 1,
       "sequence_length": 4096,
       "num_cores": 2,
       "auto_cast_type": "bf16"

 {
+ "HuggingFaceTB/SmolLM-3B-Instruct": [
+   {
       "batch_size": 1,
       "sequence_length": 4096,
       "num_cores": 2,
       "auto_cast_type": "bf16"
     },
    {
+      "batch_size": 4,
       "sequence_length": 4096,
       "num_cores": 2,
       "auto_cast_type": "bf16"